In [1]:
import pandas as pd
import numpy as np
from utils import sankey_plot_with_labels

RESULTS_FOLDER = 'results/'
CELLTYPIST_HIGH_PATH = 'results/celltypist_labels_Immune_All_High.csv'
CELLTYPIST_LOW_PATH = 'results/celltypist_labels_Immune_All_Low.csv'
CELL_TYPES_PER_CLUSTER_PATH = 'results/celltypes_labels_from_antobody_clusters.csv'
CLUSTER_LABELS_PATH = 'results/cluster_labels.csv'

  from pandas.core import (


Import data:

In [2]:
cluster_labels_df = pd.read_csv(CLUSTER_LABELS_PATH, index_col=0)
cluster_labels_df.head(2)

Unnamed: 0,leiden
AAACCCAAGACTGTTC-1,6
AAACCCAAGGATCATA-1,3


In [3]:
celltypes_per_cluster_df = pd.read_csv(CELL_TYPES_PER_CLUSTER_PATH, index_col=0)
celltypes_per_cluster_df.head(2)

Unnamed: 0,CellType,Literature1,Literature2
0,T cells,CD4 T Cell,CD8 T Cell
1,T cells,CD4 T Cell,CD8 T Cell


Select most predicted labels for each cluster:

In [4]:
celltypist_high_df = pd.read_csv(CELLTYPIST_HIGH_PATH, index_col=0)
celltypist_high_df['antibody_cluster'] = cluster_labels_df['leiden']
celltypist_high_df.head(2)

Unnamed: 0,predicted_labels,over_clustering,majority_voting,antibody_cluster
AAACCCAAGACTGTTC-1,T cells,338,T cells,6
AAACCCAAGGATCATA-1,T cells,28,T cells,3


In [5]:
celltypist_low_df = pd.read_csv(CELLTYPIST_LOW_PATH, index_col=0)
celltypist_low_df['antibody_cluster'] = cluster_labels_df['leiden']
celltypist_low_df.head(2)

Unnamed: 0,predicted_labels,over_clustering,majority_voting,antibody_cluster
AAACCCAAGACTGTTC-1,Regulatory T cells,338,Tem/Trm cytotoxic T cells,6
AAACCCAAGGATCATA-1,Tcm/Naive cytotoxic T cells,28,Tcm/Naive cytotoxic T cells,3


In [6]:
# find the most common cell type in each cluster    
for cluster in range(16):
    cluster_df = celltypist_high_df[celltypist_high_df['antibody_cluster'] == cluster]
    most_common_celltype = cluster_df['predicted_labels'].value_counts().idxmax()
    celltypes_per_cluster_df.loc[cluster, 'cell_typist_high'] = most_common_celltype
    
    cluster_df = celltypist_low_df[celltypist_low_df['antibody_cluster'] == cluster]
    most_common_celltype = cluster_df['predicted_labels'].value_counts().idxmax()
    celltypes_per_cluster_df.loc[cluster, 'cell_typist_low'] = most_common_celltype

celltypes_per_cluster_df

Unnamed: 0,CellType,Literature1,Literature2,cell_typist_high,cell_typist_low
0,T cells,CD4 T Cell,CD8 T Cell,T cells,Tcm/Naive helper T cells
1,T cells,CD4 T Cell,CD8 T Cell,T cells,Tcm/Naive helper T cells
2,B cells,,,B cells,Naive B cells
3,T cells,T Cell,CD8 T Cell,T cells,Tcm/Naive cytotoxic T cells
4,T cells,T Cell,CD8 T Cell,T cells,Tem/Trm cytotoxic T cells
5,ILC,NK Cell,CD8 T Cell,ILC,CD16+ NK cells
6,,,CD8 T Cell,T cells,Tem/Temra cytotoxic T cells
7,,T Cell,CD8 T Cell,T cells,MAIT cells
8,B cells,B Cell,,B cells,Memory B cells
9,B cells,B Cell,Naive T Cell,B cells,Tcm/Naive helper T cells


In [7]:
celltypes_per_cluster_df['cell_typist_high'].value_counts()

cell_typist_high
T cells                     10
B cells                      3
ILC                          1
Monocytes                    1
Megakaryocytes/platelets     1
Name: count, dtype: int64

In [8]:
celltypes_per_cluster_df['cell_typist_low'].value_counts()

cell_typist_low
Tcm/Naive helper T cells       6
Tem/Temra cytotoxic T cells    2
Naive B cells                  1
Tcm/Naive cytotoxic T cells    1
Tem/Trm cytotoxic T cells      1
CD16+ NK cells                 1
MAIT cells                     1
Memory B cells                 1
Classical monocytes            1
Megakaryocytes/platelets       1
Name: count, dtype: int64

In [9]:
display(celltypes_per_cluster_df['CellType'].unique())
display(celltypes_per_cluster_df['Literature1'].unique())
display(celltypes_per_cluster_df['Literature2'].unique())
display(celltypes_per_cluster_df['cell_typist_high'].unique())
display(celltypes_per_cluster_df['cell_typist_low'].unique())

array(['T cells', 'B cells', 'ILC', nan, 'Monocytes', 'HSC/MPP'],
      dtype=object)

array(['CD4 T Cell', nan, 'T Cell', 'NK Cell', 'B Cell', 'Monocytes'],
      dtype=object)

array(['CD8 T Cell', nan, 'Naive T Cell', 'CD4 T Cell'], dtype=object)

array(['T cells', 'B cells', 'ILC', 'Monocytes',
       'Megakaryocytes/platelets'], dtype=object)

array(['Tcm/Naive helper T cells', 'Naive B cells',
       'Tcm/Naive cytotoxic T cells', 'Tem/Trm cytotoxic T cells',
       'CD16+ NK cells', 'Tem/Temra cytotoxic T cells', 'MAIT cells',
       'Memory B cells', 'Classical monocytes',
       'Megakaryocytes/platelets'], dtype=object)

In [10]:
cell_names_mapping = {
    'Tcm/Naive helper T cells': 'CD8 T Cell',
    'Tcm/Naive cytotoxic T cells': 'CD8 T Cell',
    'Classical monocytes': 'Monocytes',
    'CD16+ NK cells': 'NK cells',
}

In [11]:
# change the cell type names to match the literature
celltypes_per_cluster_df['cell_typist_high'] = celltypes_per_cluster_df['cell_typist_high'].replace(cell_names_mapping)
celltypes_per_cluster_df['cell_typist_low'] = celltypes_per_cluster_df['cell_typist_low'].replace(cell_names_mapping)

In [12]:
# Nan as srtring
celltypes_per_cluster_df = celltypes_per_cluster_df.fillna('None')

In [13]:
celltypes_per_cluster_df

Unnamed: 0,CellType,Literature1,Literature2,cell_typist_high,cell_typist_low
0,T cells,CD4 T Cell,CD8 T Cell,T cells,CD8 T Cell
1,T cells,CD4 T Cell,CD8 T Cell,T cells,CD8 T Cell
2,B cells,,,B cells,Naive B cells
3,T cells,T Cell,CD8 T Cell,T cells,CD8 T Cell
4,T cells,T Cell,CD8 T Cell,T cells,Tem/Trm cytotoxic T cells
5,ILC,NK Cell,CD8 T Cell,ILC,NK cells
6,,,CD8 T Cell,T cells,Tem/Temra cytotoxic T cells
7,,T Cell,CD8 T Cell,T cells,MAIT cells
8,B cells,B Cell,,B cells,Memory B cells
9,B cells,B Cell,Naive T Cell,B cells,CD8 T Cell


### Comparison

'CellType' was inferred from the protein clusters.

In [15]:
sankey_plot_with_labels(
    labels=[celltypes_per_cluster_df['CellType'], celltypes_per_cluster_df['Literature1'],
    celltypes_per_cluster_df['Literature2'], celltypes_per_cluster_df['cell_typist_high'],
    celltypes_per_cluster_df['cell_typist_low']],
    labels_titles=['CellType', 'Literature1', 'Literature2', 'Cell Typist High', 'Cell Typist Low'],
    title='Sankey plot of cell types'
    )