In [None]:
import scanpy as sc
import scanpy.external as sce
import numpy as np
import pandas as pd
import warnings, scipy.sparse as sp, matplotlib, matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.pyplot import rc_context
from collections import Counter
import matplotlib.font_manager
import pyreadr
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects
import magic
#import seaborn as sns
import palantir
import loompy
#from scipy.sparse import csgraph

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = 'Arial'
matplotlib.rc('font', size=14)
import matplotlib.lines as lines

pd.set_option('display.max_rows', 200)

sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()

In [None]:
# preset color palettes and color maps
user_defined_palette =  [ '#F6222E', '#16FF32', '#3283FE', '#FEAF16', '#BDCDFF', '#3B00FB', '#1CFFCE', '#C075A6', '#F8A19F', '#B5EFB5', '#FBE426', '#C4451C', 
                          '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', '#1CBE4F', '#1C8356', '#0e452b', '#AA0DFE', '#B5EFB5', '#325A9B', '#90AD1C']

user_defined_cmap_markers = LinearSegmentedColormap.from_list('mycmap', ["#E6E6FF", "#CCCCFF", "#B2B2FF", "#9999FF",  "#6666FF",   "#3333FF", "#0000FF"])
user_defined_cmap_degs = LinearSegmentedColormap.from_list('mycmap', ["#0000FF", "#3333FF", "#6666FF", "#9999FF", "#B2B2FF", "#CCCCFF", "#E6E6FF", "#E6FFE6", "#CCFFCC", "#B2FFB2", "#99FF99", "#66FF66", "#33FF33", "#00FF00"])

In [None]:
path_to_h5ad = '../data/public/human_thymus/HTA08.v01.A05.Science_human_fig1.h5ad'

In [None]:
adata = sc.read_h5ad(path_to_h5ad)

In [None]:
adata.var_names_make_unique()

In [None]:
adata.shape

In [None]:
sc.pl.umap(
    adata,
    color= ['Age','Anno_level_1', 'Anno_level_fig1'],
    color_map='Spectral_r',
    frameon=False,
    size=10,
    add_outline=True,
    sort_order=False,
    ncols=3,
    wspace=0.5
)

### Focus only on the cd45neg cells of postnatal and aged samples (>10 cells)

In [None]:
cd45neg = adata[((adata.obs['Anno_level_1']=='TEC') | (adata.obs['Anno_level_fig1']=='Endo') | (adata.obs['Anno_level_fig1']=='Fb_1') | (adata.obs['Anno_level_fig1']=='Fb_2') | (adata.obs['Anno_level_fig1']=='Fb_cycling')) &
                ((adata.obs['Age']=='3m') | (adata.obs['Age']=='35y'))]

In [None]:
cd45neg.obs['stage'] = ['03mo' if (x=='3m') else '35yr' if (x=='35y') else 'error' for x in cd45neg.obs['Age'] ]

In [None]:
cd45neg.uns['stage_colors'] =  [ '#76D6FF','#FF8072'] # ['#F5B4AE', '#8FD6D9']

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(cd45neg, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(cd45neg, n_top_genes=2500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(cd45neg, n_comps=200, svd_solver='arpack', random_state=rng)

In [None]:
def observe_variance(anndata_object):
    fig = plt.figure(figsize=(10,5))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    # variance per principal component
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = anndata_object.uns['pca']['variance_ratio']
    ax1.scatter(x,y,s=4)
    ax1.set_xlabel('PC')
    ax1.set_ylabel('Fraction of variance explained\n')
    ax1.set_title('Fraction of variance explained per PC\n')
    # cumulative variance explained
    cml_var_explained = np.cumsum(anndata_object.uns['pca']['variance_ratio'])
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = cml_var_explained
    ax2.scatter(x,y,s=4)
    ax2.set_xlabel('PC')
    ax2.set_ylabel('Cumulative fraction of variance\nexplained')
    ax2.set_title('Cumulative fraction of variance\nexplained by PCs')
    fig.tight_layout()
    plot = plt.show
    return(plot)
observe_variance(cd45neg)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(cd45neg, n_comps=35, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(cd45neg, 'Age')

In [None]:
sc.pp.neighbors(cd45neg, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(cd45neg)

In [None]:
sc.pl.umap(
    cd45neg,
    color= ['EPCAM', 'PDGFRA',  'PECAM1', 'stage', 'Anno_level_fig1'],
    color_map='Spectral_r', 
    use_raw=False, 
    ncols=5, 
    wspace = 0.3,
    outline_width=[0.6, 0.05], 
    size=15,  
    frameon=False, 
    add_outline=True, 
    sort_order = False)


In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(cd45neg, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    cd45neg, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
cd45neg.obs['cell_type'] = ['TEC' if (x=='2' or x=='3' or x=='5' or x=='6' or x=='7') else 
                            'EC' if (x=='1' or x=='4' or x=='8') else
                            'FB' if (x=='0') else 'ERROR' for x in cd45neg.obs['leiden_0.5']] 

In [None]:
cd45neg.uns['cell_type_colors'] = [ '#ff7f0e','#199919', '#aa40fc']

In [None]:
sc.pl.umap(
    cd45neg,
    color= ['EPCAM', 'PDGFRA', 'PECAM1', 'stage', 'cell_type'],
    color_map='Spectral_r', 
    use_raw=False, 
    ncols=5, 
    wspace = 0.3,
    outline_width=[0.6, 0.05], 
    size=15,  
    frameon=False, 
    add_outline=True, 
    sort_order = False)


### Human ECs

In [None]:
EC = cd45neg[cd45neg.obs['cell_type']=='EC']

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(EC, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(EC, n_top_genes=2500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(EC, n_comps=200, svd_solver='arpack', random_state=rng)

In [None]:
observe_variance(EC)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(EC, n_comps=35, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(EC, 'Age')

In [None]:
sc.pp.neighbors(EC, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(EC)

In [None]:
arteries = ['8430408G22RIK', 'CLU', 'CRIP1', 'FBLN2', 'GJA4',  'HEY1', 'MECOM', 'SAT1', 'SEMA3G', 'SOX17', 'TM4SF1', 'TSC22D1'] 
capilaries = ['AW112010', 'BC028528', 'CAR4', 'CD200', 'CD300LG',  'GPIHBP1', 'KDR',  'RGCC',  'SGK1', 'SPARC']
veins = ['APOE', 'BGN', 'CTLA2A', 'ICAM1', 'IL6ST',  'PTGS1',  'TMSB10', 'VCAM1',  'VWF']
lymphatic = ['PROX1', 'PDPN', 'LYVE1']
artEC_sign = ['FBLN5',	'STMN2',	'CLU',	'SEMA3G',	'GLUL',	'TM4SF1',	'EPS8L2',	'COL8A1',	'NEBL',	'VEGFC',	'ALPL',	'ARL15',	'GJA4',	'EDN1',	'S100A6',	'MAST4',	'SOX17',	'PDGFD',	'FBLN2',	'VIM',	'LTBP4',	'SLC6A6',	'KLF2',	'HEG1',	'EPAS1',	'FXYD5',	'LY6A',	'CRIP1',	'FN1',	'CD9',	'MECOM',	'TSC22D1',	'BSG',	'ATOX1',	'PODXL',	'PTPRR',	'EBF1',	'CDK19',	'ICAM2',	'TSPO',	'CST3',	'ELN',	'AZIN1',	'PCSK5',	'GADD45G',	'TIMP3',	'ACE',	'TANC2',	'RGS10',	'GJA5']
capEC_sign = ['GPIHBP1',	'RGCC',	'FABP4',	'CD36',	'CAR4',	'MGLL',	'CD300LG',	'TCF15',	'KDR',	'ARHGAP18',	'IGFBP7',	'KANK3',	'AQP7',	'ETL4',	'TSPAN13',	'CAVIN2',	'HSPB1',	'PPP1R2',	'LIMS2',	'FABP5',	'NRP1',	'TIMP4',	'GNG11',	'SEPT4',	'LPL',	'GM12002',	'CCDC85A',	'PLPP3',	'ABLIM3',	'SPARC',	'XDH',	'TMSB4X',	'AW112010',	'CTNNBIP1',	'THRSP',	'ADGRL4',	'CAV2',	'CXCL12',	'DHRS3',	'CD81',	'EMCN',	'TCIM',	'C1QTNF9',	'SPARCL1',	'CCDC85B',	'PITPNC1',	'RFLNB',	'UBB',	'LY6C1',	'SULT1A1']
venEC_sign = ['LRG1',	'IL6ST',	'VWF',	'PLVAP',	'MT-CO1',	'SELP',	'SPINT2',	'PCDH7',	'MT-ND4',	'2200002D01RIK',	'TMSB10',	'EEF1A1',	'MT-CO3',	'BGN',	'PDE4D',	'MT-ATP6',	'AQP1',	'TPT1',	'CTLA2A',	'ACKR1',	'APOE',	'PDLIM1',	'CD9',	'ST3GAL4',	'EHD4',	'FTH1',	'EEF1B2',	'PECAM1',	'THSD7A',	'COL15A1',	'MAN1A',	'VIM',	'ENPP2',	'RBP1',	'IGFBP4',	'ABCA1',	'CSRP2',	'SLCO2B1',	'ZFP521',	'VCAM1',	'DPYSL3',	'NR2F2',	'PAM',	'LDB2',	'INSR',	'CD74',	'TMEM176B',	'IL1R1',	'ARRB1',	'ARHGAP26']

In [None]:
x=9
sc.tl.score_genes(EC, gene_list = arteries,  score_name='arterial', use_raw=False)
sc.tl.score_genes(EC, gene_list = capilaries,  score_name='capilary', use_raw=False)
sc.tl.score_genes(EC, gene_list = veins,  score_name='venular', use_raw=False)
sc.tl.score_genes(EC, gene_list = lymphatic,  score_name='lymphatic', use_raw=False)
sc.tl.score_genes(EC, gene_list = artEC_sign[0:x],  score_name='artEC_sign', use_raw=False)
sc.tl.score_genes(EC, gene_list = capEC_sign[0:x],  score_name='capEC_sign', use_raw=False)
sc.tl.score_genes(EC, gene_list = venEC_sign[0:x],  score_name='venEC_sign', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    EC, 
    color=['arterial', 'capilary','venular', 'lymphatic', 'artEC_sign', 'capEC_sign', 'venEC_sign',], 
    ncols=4,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.1, 
    add_outline=True
)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(EC, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    EC, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
EC.obs['cell_type_subset'] = ['0:arEC' if (x=='4') else 
                              '1:capEC' if (x=='5' or x=='8') else
                              '2:venEC' if (x=='0' or x=='1' or x=='2' or x=='3' or x=='6' or 
                                            x=='7' or x=='9' or x=='10' or x=='11') else 
                              'ERROR' for x in EC.obs['leiden_1.0']] 

In [None]:
EC.uns['cell_type_subset_colors'] = ['#F6222E', '#3283FE', '#16FF32']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    EC, 
    color=['artEC_sign', 'capEC_sign', 'venEC_sign', 'stage', 'cell_type_subset'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.2,
    add_outline=True
)

### Human FBs

In [None]:
FB = cd45neg[cd45neg.obs['cell_type']=='FB']

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(FB, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(FB, n_top_genes=3500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB, n_comps=200, svd_solver='arpack', random_state=rng)

In [None]:
observe_variance(FB)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(FB, 'Age')

In [None]:
sc.pp.neighbors(FB, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(FB)

In [None]:
capsular = ['AKR1C18',	'MRGPRG',	'UPK3B',	'SMPD3',	'SEMA3C',	'DPP4',	'EFHD1',	'PCSK6',	'PI16',	'ACKR3',	'SFRP2',	'MSLN',	'SFRP4',	'ADGRD1',	'SPON2',	'MFAP5',	'GPC3',	'SAA3',	'OGN',	'MT2',	'CPXM2',	'LRRN4CL',	'FNDC1',	'ANXA3',	'QPCT',	'CXCL13',	'SMOC2',	'IGFBP6',	'NOV',	'CSRP2']
medullary = ['GJA4',	'MMP9',	'PDE2A',	'VTN',	'DES',	'CRSC',	'ECSCR',	'C1QTNF5',	'CX3CL1',	'ACTA2',	'MEOX1',	'SDC3',	'POSTN',	'TAGLN',	'SERPINE2',	'ENPP2',	'SLCO2B1',	'NDUFA4L2',	'SERPINA3G',	'LTBP1',	'GFRA2',	'MFGE8',	'RASA3']							
perilobular = ['C7',	'DPT',	'PTN',	'PTGDS',	'RBP5',	'RSPO3',	'MFAP4',	'SPARCL1',	'SRPX',	'HSPB6',	'CCL2',	'FXYD6',	'NR2F1',	'DBI',	'SPRY1',	'LITAF',	'DCN',	'RUNX1T1',	'RWDD1']
interlobular = ['LUM',	'MGP',	'OGN',	'FN1',	'IGFBP6',	'COL1A2',	'COL14A1',	'COL3A1',	'COL1A1',	'FBN1',	'DCN',	'ITM2A',	'CCDC80',	'CTSK',	'WISP2',	'ID2',	'SPON2',	'FSTL1',	'FBLN2',	'MFAP5']
capsFB_sign = ['PI16',	'TIMP2',	'FN1',	'OPCML',	'CD248',	'MFAP5',	'ANXA3',	'PCOLCE2',	'ACKR3',	'FNDC1',	'IGFBP6',	'SEMA3C',	'CLEC3B',	'CREB5',	'SMPD3',	'TMEM100',	'ADGRD1',	'LY6C1',	'PCSK6',	'NID1',	'FBN1',	'DPP4',	'PLA1A',	'EFHD1',	'COL14A1',	'EBF2',	'FSTL1',	'METRNL',	'PLPP3',	'SDK1',	'LIMCH1',	'AXL',	'LOXL1',	'ANXA1',	'EMILIN2',	'TMSB4X',	'TNXB',	'GFPT2',	'LSP1',	'LY6A',	'EFNA5',	'ADAMTS5',	'IGFBP5',	'UGDH',	'TIMP3',	'OGN',	'HEG1',	'ISLR',	'DDR2',	'PPP1R14B']
intFB_sign = ['SMOC2',	'GPX3',	'PENK',	'IGF1',	'GAS1',	'INMT',	'PCOLCE',	'LRP1',	'ITM2A',	'GDF10',	'RBP1',	'FBLN1',	'SFRP1',	'SLIT3',	'SVEP1',	'AUTS2',	'LPL',	'CELF2',	'SERPINF1',	'IL11RA1',	'ADAMTS12',	'ABCA8A',	'CYGB',	'NFIB',	'DCN',	'SELENOP',	'NTRK2',	'TMEM119',	'MGST1',	'IGFBP3',	'MMP2',	'AR',	'GSN',	'SRPX',	'COL15A1',	'MT1',	'LUM',	'OLFML3',	'MFAP2',	'COL3A1',	'FXYD6',	'FST',	'CRYAB',	'TXNIP',	'HTRA3',	'MFAP4',	'ITM2B',	'FBLN2',	'DHRS3',	'COL1A2']
medFB_sign = ['CSMD1',	'SERPINE2',	'ENPP2',	'PTN',	'TMEM176B',	'TMEM176A',	'B2M',	'CCL19',	'IGFBP7',	'APOD',	'BGN',	'H2-D1',	'DES',	'LTC4S',	'LHFP',	'IFITM1',	'CD9',	'APOE',	'LSAMP',	'LY6E',	'H2-K1',	'H2-Q7',	'SPARCL1',	'IL34',	'NDUFA4L2',	'TCF4',	'SPON1',	'COLEC12',	'NRP1',	'MYLK',	'HSD11B1',	'MFGE8',	'JUN',	'CP',	'MEOX1',	'ECSCR',	'TRPS1',	'ANK2',	'GPM6B',	'CSTB',	'CD63',	'PDE4B',	'POSTN',	'H3F3B',	'MGP',	'PTEN',	'PTMA',	'LTBP1',	'COL15A1',	'PDE7B']

In [None]:
x=9
sc.tl.score_genes(FB, gene_list = perilobular,  score_name='perilobular\n(human)', use_raw=False)
sc.tl.score_genes(FB, gene_list = interlobular,  score_name='interlobular\n(human)', use_raw=False)
sc.tl.score_genes(FB, gene_list = capsular,  score_name='capsular\n(mouse)', use_raw=False)
sc.tl.score_genes(FB, gene_list = medullary,  score_name='medullary\n(mouse)', use_raw=False)
sc.tl.score_genes(FB, gene_list = capsFB_sign[0:x],  score_name='capsFB_sign', use_raw=False)
sc.tl.score_genes(FB, gene_list = intFB_sign[0:x],  score_name='intFB_sign', use_raw=False)
sc.tl.score_genes(FB, gene_list = medFB_sign[0:x],  score_name='medFB_sign', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB, 
    color=['capsular\n(mouse)', 'medullary\n(mouse)', 'perilobular\n(human)', 'interlobular\n(human)', 'capsFB_sign', 'intFB_sign', 'medFB_sign', 'stage'], 
    ncols=4,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.1, 
    add_outline=True
)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(FB, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    FB, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
FB.obs['cell_type_subset'] = ['3:capsFB' if (x=='6') else 
                                 '4:intFB' if (x=='0' or x=='2' or x=='3' or x=='4' or x=='7') else
                                 '5:medFB' if (x=='1' or x=='5') else 'ERROR' for x in FB.obs['leiden_0.8']] 

In [None]:
FB.uns['cell_type_subset_colors'] = ['#BDCDFF', '#3B00FB', '#1CFFCE']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB, 
    color=['capsFB_sign', 'intFB_sign', 'medFB_sign', 'stage', 'cell_type_subset'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.2,
    add_outline=True
)

### Human TECs

In [None]:
TEC = cd45neg[(cd45neg.obs['cell_type']=='TEC')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(TEC, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(TEC, n_top_genes=3500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC, n_comps=200, svd_solver='arpack', random_state=rng)

In [None]:
observe_variance(TEC)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC, n_comps=45, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(TEC, 'Age')

In [None]:
sc.pp.neighbors(TEC, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(TEC)

In [None]:
aaTEC1_sign = ['ZBTB20',	'CD81',	'TRPM3',	'SLC16A11',	'GSTM1',	'BTG1',	'BC006965',	'CLDN3',	'S100A11',	'GSTA4',	'LY6E',	'CDK19',	'WFDC18',	'SLC9A3R1',	'IFI27L2A',	'ALDOC',	'PHLDA1',	'ANXA2',	'TALDO1',	'SOCS2',	'FAM107A',	'KRT18',	'PBX1',	'TM4SF1',	'MEIS2',	'GSN',	'ANXA1',	'BMP6',	'EPCAM',	'DBI',	'SHANK2',	'KIF19',	'ISL1',	'PLIN2',	'LY6A',	'NUPR1',	'ID2',	'RCN1',	'GPC6',	'RDH10',	'CD24',	'MT-CO1',	'GAB2',	'PDE4B',	'SLC5A8',	'NEDD4L',	'FBXO2',	'TSHZ2',	'ACSS3',	'CD9']
aaTEC2_sign = ['CSMD1',	'ZEB2',	'MGP',	'GSN',	'ZBTB20',	'NDRG2',	'SERPINE2',	'CD81',	'FXYD1',	'LAPTM4A',	'GPX3',	'SOCS3',	'IGFBP7',	'AIRN',	'LGALS1',	'FOS',	'PRRX1',	'PRKG1',	'JUNB',	'SPARC',	'CEBPD',	'APOE',	'GSTM1',	'GADD45G',	'ID3',	'EGR1',	'PTN',	'CD63',	'TM4SF1',	'CCDC80',	'PHLDA1',	'CAVIN3',	'CALD1',	'SERPING1',	'SPARCL1',	'CCL19',	'CST3',	'CACNA1C',	'JUND',	'FSTL1',	'TAGLN2',	'KLF9',	'JUN',	'GUCY1A1',	'COL3A1',	'NOTCH3',	'RORA',	'TSHZ2',	'LHFP',	'GNAS']
cTEC_sign = ['CTSL',	'CSTB',	'CXCL12',	'PRSS16',	'KRT18',	'GAS6',	'PLTP',	'SLC46A2',	'NDUFA11',	'TBATA',	'CCL25',	'PSMB11',	'TMEM131L',	'NLGN1',	'PRXL2B',	'PAX1',	'WNT4',	'DPP6',	'PSMB9',	'BNIP3L',	'SNHG11',	'COPZ2',	'AI646519',	'ANK3',	'LAMP2',	'NDRG3',	'SHISA2',	'RGCC',	'KRT8',	'RBFOX1',	'TSC22D1',	'KCTD1',	'LY75',	'SH2D4B',	'IGFBP5',	'FABP5',	'PLGRKT',	'SPOCK2',	'RYR3',	'SYNGR1',	'ZFYVE21',	'TENM4',	'TRP63',	'CLIC5',	'GMPR',	'CASTOR1',	'MACF1',	'IFI27',	'LIMCH1',	'KCNK2']
mTEC1_sign = ['APOE',	'IFITM3',	'RBMS3',	'LY6A',	'IFI27L2A',	'KRT5',	'KRT14',	'CTSL',	'GAS1',	'IFITM2',	'CCL21A',	'ITM2B',	'GAS6',	'DCN',	'ISG15',	'IGFBP4',	'LAPTM4A',	'EYA4',	'CPNE8',	'MGP',	'MIR100HG',	'SULT5A1',	'RTP4',	'IIGP1',	'NXN',	'RBP1',	'GAS5',	'ZFP36L1',	'GSN',	'ANK3',	'CST3',	'PBX1',	'LIFR',	'MEIS2',	'PERP',	'OASL2',	'ANXA2',	'S100A10',	'BOC',	'ATP1A1',	'CCL11',	'PHLDA3',	'LY6E',	'NEDD4',	'MYL6',	'TRP63',	'ANXA1',	'TPT1',	'HIF1A',	'GPX3']
mTECprol_sign = ['TPM2',	'KRT17',	'ASCL1',	'TMSB10',	'PTMA',	'RGS5',	'ADM',	'H2AFY2',	'HES6',	'KRT7',	'CDK4',	'PFN1',	'SOX4',	'SKINT10',	'TUBB6',	'CFL1',	'MAPK13',	'TUBB2B',	'WFDC18',	'CCND2',	'STMN1',	'MDK',	'CCL19',	'CD82',	'GM49708',	'H3F3B',	'LGALS1',	'PRXL2B',	'YBX1',	'ACTG1',	'EEF1G',	'NME2',	'GM15943',	'IL11RA1',	'COLQ',	'CALD1',	'KRT5',	'HNRNPA1',	'EIF4A1',	'PPIA',	'CCL21A',	'TUBB5',	'HSP90AB1',	'ANGPTL2',	'KCNQ3',	'LPO',	'SET',	'HSPA8',	'HMGN1',	'NPM1']
mTEC2_sign = ['SRGN',	'CYBA',	'UBD',	'CD74',	'S100A14',	'SYT1',	'CD52',	'AIRE',	'H2-AA',	'H2-OA',	'LRRC42',	'MRPL38',	'H2-EB1',	'HAGH',	'NFKBIA',	'DPP10',	'CDX1',	'PSME2',	'HDC',	'CALCB',	'H2-DMB2',	'H2-EB2',	'FAM89A',	'GM47938',	'HSPB11',	'FSCN1',	'CSN2',	'BSPRY',	'NDUFC2',	'FEZF2',	'FABP5',	'ANKRD33B',	'ING1',	'GM48239',	'UTF1',	'IL4I1',	'TXN1',	'CTSS',	'NUP85',	'CIB1',	'LAPTM5',	'TM4SF5',	'SYNGR2',	'DOCK10',	'DIO1',	'NSMCE2',	'SNX29',	'S100G',	'CD40',	'PTPRQ']
mTEC3_sign = ['LY6D',	'DAPL1',	'DMKN',	'SPINK5',	'SKINT3',	'FXYD3',	'SFN',	'TACSTD2',	'HSPB1',	'CDKN1A',	'OIT1',	'PDZK1IP1',	'PRDX5',	'CDKN2A',	'PERP',	'CDKN2B',	'GSTA4',	'ATOX1',	'LYPD3',	'CST6',	'METRNL',	'SBSN',	'DSTN',	'DSP',	'CALML3',	'RAB11A',	'KRT17',	'KRT23',	'BCL2A1B',	'SERPINB2',	'H2AFJ',	'JUP',	'AVPI1',	'4833423E24RIK',	'IDE',	'CEBPB',	'GSTA2',	'SDC1',	'CLIC3',	'TMEM54',	'2200002D01RIK',	'DGAT2',	'TMEM45A',	'CLDN4',	'RPTN',	'GLTP',	'NUPR1',	'ARF6',	'TSPAN8',	'IER5']
tuft_sign = ['MCTP1',	'GNG13',	'AVIL',	'RGS13',	'ESPN',	'LRMP',	'ANXA4',	'LTC4S',	'PTPN18',	'BMX',	'CYSTM1',	'IVNS1ABP',	'FYB',	'LY6G6F',	'CALM2',	'ETHE1',	'REEP5',	'LIMA1',	'1810046K07RIK',	'ABHD2',	'PIK3R5',	'TRPM5',	'ST18',	'CRIP1',	'DGKI',	'POU2F3',	'VAV1',	'SCAND1',	'CHIL1',	'ALOX5AP',	'PTPN6',	'PLAC8',	'ALOX5',	'OSTF1',	'OXR1',	'PLK2',	'OCIAD2',	'SH2D6',	'AHNAK2',	'STK38',	'TMEM245',	'ATP1A2',	'ALDH2',	'CD47',	'FXYD6',	'INPP5D',	'DCLK1',	'GPCPD1',	'SH2D7',	'STRIP2']
nTEC_sign = ['PTPRN2',	'CACNA2D1',	'CAR8',	'SCG5',	'STXBP5L',	'SNAP25',	'CHGA',	'CD9',	'DNAJC12',	'CCSER1',	'CAMK2N1',	'SYT7',	'PAM',	'CACNA1A',	'SYT1',	'CACNB2',	'CYSTM1',	'TSHZ2',	'FAM183B',	'CPLX2',	'CEACAM10',	'BTG2',	'RIMS2',	'RESP18',	'FHL2',	'A230057D06RIK',	'TMEM163',	'RAP1GAP2',	'KCNB2',	'NFASC',	'PIP5K1B',	'ALCAM',	'CHGB',	'ICA1',	'PCBD1',	'KRT7',	'INSM1',	'SMIM22',	'JUND',	'5330417C22RIK',	'ATF3',	'AC149090.1',	'BEX2',	'PRKN',	'CPN1',	'TOX3',	'NOL4',	'A330076H08RIK',	'AOPEP',	'STARD10']
goblet_sign = ['WFDC2',	'CYP2F2',	'CXCL17',	'GSTO1',	'NUPR1',	'S100A11',	'ANXA3',	'SORBS2',	'H2-K1',	'SERPINB11',	'KRT19',	'SLC12A2',	'FXYD3',	'LY6D',	'AQP5',	'ALCAM',	'BSG',	'BACE2',	'LY6E',	'F3',	'LY6A',	'MECOM',	'AW112010',	'PGLYRP1',	'ATP1B1',	'CP',	'ANXA1',	'TSPAN8',	'IRF7',	'LGALS3BP',	'TCEAL9',	'NFIB',	'GSTA4',	'CD14',	'MLLT3',	'SLC16A11',	'UPK1B',	'LRRC26',	'FOXA1',	'ECE1',	'TIMP2',	'IFI27L2A',	'CES1D',	'EPAS1',	'CBR2',	'LMO7',	'SPINK5',	'HSPB1',	'SMIM22',	'KCNJ16']
Mlike_sign = ['CCL20',	'CCL9',	'SERPINB6A',	'SERPINB1A',	'TMSB4X',	'2200002D01RIK',	'NOSTRIN',	'CTSH',	'CCL6',	'AW112010',	'SPIB',	'BCL2A1D',	'CSN2',	'CYP2A5',	'FABP1',	'HAMP',	'POLD1',	'BCL2A1B',	'ATOX1',	'ISCU',	'PGLYRP1',	'BCL2A1A',	'CLU',	'KRT20',	'ATP6V1C1',	'H2-M2',	'GJB2',	'CRIP1',	'FABP5',	'UBD',	'VAMP5',	'MARCKSL1',	'PLB1',	'ARPC1B',	'4930520O04RIK',	'TNFRSF11B',	'SEPHS2',	'DSG1A',	'SPINT2',	'FTL1',	'AA467197',	'SEPT1',	'AIF1',	'GADD45A',	'TNFAIP2',	'VAMP8',	'FABP4',	'HSPE1',	'RAC2',	'AHCYL2']

In [None]:
x=29
sc.tl.score_genes(TEC, gene_list = aaTEC1_sign[0:x],  score_name='aaTEC1_sign')
sc.tl.score_genes(TEC, gene_list = aaTEC2_sign[0:x],  score_name='aaTEC2_sign')
sc.tl.score_genes(TEC, gene_list = cTEC_sign[0:x],  score_name='cTEC_sign')
sc.tl.score_genes(TEC, gene_list = mTEC1_sign[0:x],  score_name='mTEC1_sign')
sc.tl.score_genes(TEC, gene_list = mTECprol_sign[0:x],  score_name='mTECprol_sign')
sc.tl.score_genes(TEC, gene_list = mTEC2_sign[0:x],  score_name='mTEC2_sign')
sc.tl.score_genes(TEC, gene_list = mTEC3_sign[0:x],  score_name='mTEC3_sign')
sc.tl.score_genes(TEC, gene_list = tuft_sign[0:x],  score_name='tuft_sign')
sc.tl.score_genes(TEC, gene_list = nTEC_sign[0:x],  score_name='nTEC_sign')
sc.tl.score_genes(TEC, gene_list = goblet_sign[0:x],  score_name='goblet_sign')
sc.tl.score_genes(TEC, gene_list = Mlike_sign[0:x],  score_name='Mlike_sign')

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
        TEC, 
        color=['aaTEC1_sign', 'aaTEC2_sign', 'cTEC_sign', 'mTEC1_sign', 'mTECprol_sign', 
               'mTEC2_sign', 'mTEC3_sign', 'tuft_sign', 'nTEC_sign', 'goblet_sign', 'Mlike_sign'], 
        ncols=4,
        outline_width=[0.6, 0.05],
        frameon=False,
        cmap='Spectral_r',
        wspace = 0.1, 
        add_outline=True
)

In [None]:
for resolution_parameter in [0.2, 0.4, 0.6, 0.8, 1.0, 1.2, 1.4, 1.6, 1.8, 2.0]:
    sc.tl.leiden(TEC, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    TEC, 
    color=['leiden_0.2', 'leiden_0.4', 'leiden_0.6', 'leiden_0.8', 'leiden_1.0', 
           'leiden_1.2', 'leiden_1.4', 'leiden_1.6','leiden_1.8', 'leiden_2.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    TEC, 
    color=['leiden_1.2'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
TEC.obs['cell_type_subset'] = ['10:aaTEC1' if (x=='14') else '11:aaTEC2' if (x=='8') else '20:muscle' if x=='15' else
                               '12:cTEC' if (x=='9' or x=='13' ) else '18:mimic(neuroendo)' if ( x=='12' or x=='18') else
                               '13:mTEC1' if (x=='0' or x=='1' or x=='2' or x=='3' or x=='4' or x=='5' or x=='6' or x=='7') else 
                                 '14:mTEC-prol' if x=='7' else '15:mTEC2' if (x=='10' or x=='16' or x=='17') else
                                 '16:mTEC3' if ( x=='11' or x=='14') else 'ERROR' for x in TEC.obs['leiden_1.2']] 

In [None]:
TEC.uns['cell_type_subset_colors'] = [ '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F',  '#B5EFB5', 'black', '#AA0DFE','#FEAF16', '#325A9B', '#C075A6']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC, 
    color=['stage', 'cell_type_subset'], 
    ncols=5,
    use_raw=False,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

### Transfer annotation

In [None]:
annotated_subsets = pd.concat([EC.obs['cell_type_subset'], FB.obs['cell_type_subset'],  TEC.obs['cell_type_subset']])

In [None]:
cd45neg.obs['cell_type_subset']=''

In [None]:
cd45neg.obs['cell_type_subset'][cd45neg.obs.index.isin(annotated_subsets.index) == True] = annotated_subsets

In [None]:
cd45neg.obs['sample']=cd45neg.obs['stage']

In [None]:
cd45neg.uns['cell_type_subset_colors'] = ['#F6222E', '#3283FE', '#16FF32', '#BDCDFF', '#3B00FB', '#1CFFCE', '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F', '#FEAF16', 'black','#325A9B', '#C075A6', 'black']

In [None]:
sc.pl.umap(cd45neg, color=['stage', 'cell_type_subset'], 
                     color_map='Spectral_r',
                     use_raw=False, 
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 
                     sort_order = False,
                     save='_cd45neg_human.pdf'
)

In [None]:
path_to_h5ad = '../../../output/metadata/anndata_objects/Fig1pt3.h5ad'

In [None]:
cd45neg.write(path_to_h5ad)