## Notebook setup

In [None]:
import scanpy as sc
import scanpy.external as sce
import numpy as np
import pandas as pd
import warnings, scipy.sparse as sp, matplotlib, matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.pyplot import rc_context
from collections import Counter
import matplotlib.font_manager
import openpyxl
import pyreadr
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects
import magic
#import seaborn as sns
import palantir
import loompy
import feather
import re
#from scipy.sparse import csgraph

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = 'Arial'
matplotlib.rc('font', size=14)
import matplotlib.lines as lines

pd.set_option('display.max_rows', 200)

sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()

In [None]:
user_defined_palette =  [ '#F6222E', '#FEAF16','#3283FE','#BDCDFF', '#3B00FB', '#F8A19F', '#1CFFCE',  '#C4451C', 
                          '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', '#1CBE4F','#B5EFB5', '#0e452b', '#AA0DFE']

In [None]:
user_defined_cmap_markers = LinearSegmentedColormap.from_list('mycmap', ["#E6E6FF", "#CCCCFF", "#B2B2FF", "#9999FF",  "#6666FF",   "#3333FF", "#0000FF"])
user_defined_cmap_degs = LinearSegmentedColormap.from_list('mycmap', ["#0000FF", "#3333FF", "#6666FF", "#9999FF", "#B2B2FF", "#CCCCFF", "#E6E6FF", "#E6FFE6", "#CCFFCC", "#B2FFB2", "#99FF99", "#66FF66", "#33FF33", "#00FF00"])

In [None]:
%matplotlib inline 

## 1 Load data for Figure 1

In [None]:
path_to_h5ad = '../output/metadata/anndata_objects/Fig1_pt1.h5ad'

In [None]:
adata_d0 = sc.read_h5ad(path_to_h5ad)
adata_d0.uns['log1p']["base"] = None

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)

sc.pl.umap(adata_d0, color=['cell_type', 'stage', 'day'], 
                     color_map='Spectral_r',
                     use_raw=False, 
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 
                     sort_order = False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)

sc.pl.umap(adata_d0, color=['Pdgfra',  'Epcam', 'H2-Aa',  'Pecam1', 'Cdh5',  'Nkain4', 'Upk3b', 'Acta2', 'Myl9', 'S100b'], 
                        color_map='Spectral_r',
                        use_raw=False, 
                        ncols=5, 
                        wspace = 0.1,
                        outline_width=[0.6, 0.05], 
                        size=15,  
                        frameon=False, 
                        add_outline=True, 
                        sort_order = False
                        )

## 2 Analyze TEC, FB and EC and annotate based on public marker genes and signatures

### 2.1 Our ECs

In [None]:
EC_d0 = adata_d0[(adata_d0.obs['cell_type']=='EC')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(EC_d0, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(EC_d0, n_top_genes=3500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(EC_d0, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
def observe_variance(anndata_object):
    fig = plt.figure(figsize=(10,5))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    # variance per principal component
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = anndata_object.uns['pca']['variance_ratio']
    ax1.scatter(x,y,s=4)
    ax1.set_xlabel('PC')
    ax1.set_ylabel('Fraction of variance explained\n')
    ax1.set_title('Fraction of variance explained per PC\n')
    # cumulative variance explained
    cml_var_explained = np.cumsum(anndata_object.uns['pca']['variance_ratio'])
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = cml_var_explained
    ax2.scatter(x,y,s=4)
    ax2.set_xlabel('PC')
    ax2.set_ylabel('Cumulative fraction of variance\nexplained')
    ax2.set_title('Cumulative fraction of variance\nexplained by PCs')
    fig.tight_layout()
    plot = plt.show
    return(plot)
observe_variance(EC_d0)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(EC_d0, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(EC_d0, 'sample')

In [None]:
sc.pp.neighbors(EC_d0, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(EC_d0)

### ECs in other tissues

In [None]:
arteries = ['8430408g22Rik', 'Clu', 'Crip1', 'Fbln2', 'Gja4',  'Hey1', 'Mecom', 'Sat1', 'Sema3g', 'Sox17', 'Tm4sf1', 'Tsc22d1'] 
capilaries = ['Aw112010', 'Bc028528', 'Car4', 'Cd200', 'Cd300lg',  'Gpihbp1', 'Kdr',  'Rgcc',  'Sgk1', 'Sparc']
veins = ['Apoe', 'Bgn', 'Ctla2a', 'Icam1', 'Il6st',  'Ptgs1',  'Tmsb10', 'Vcam1',  'Vwf']
lymphatic = ['Prox1', 'Pdpn', 'Lyve1']

In [None]:
sc.tl.score_genes(EC_d0, gene_list = arteries,  score_name='arterial', use_raw=False)
sc.tl.score_genes(EC_d0, gene_list = capilaries,  score_name='capilary', use_raw=False)
sc.tl.score_genes(EC_d0, gene_list = veins,  score_name='venular', use_raw=False)
sc.tl.score_genes(EC_d0, gene_list = lymphatic,  score_name='lymphatic', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)

sc.pl.umap(
    EC_d0,
    color=['arterial', 'capilary','venular', 'lymphatic'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0
)

### ECs clustering and annotation

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(EC_d0, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    EC_d0, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    EC_d0, 
    color=['leiden_0.1'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
EC_d0.obs['cell_type_subset'] = ['0:arEC' if (x=='2') else 
                                 '1:capEC' if (x=='0' or x=='1' or x=='4') else
                                 '2:venEC' if (x=='3') else 'ERROR' for x in EC_d0.obs['leiden_0.1']] 

In [None]:
EC_d0.uns['cell_type_subset_colors'] = ['#F6222E', '#3283FE', '#16FF32']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    EC_d0, 
    color=['cell_type_subset', 'stage', 'day', 'sample'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

### Our FB

In [None]:
FB_d0 = adata_d0[(adata_d0.obs['cell_type']=='FB')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(FB_d0, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(FB_d0, n_top_genes=3500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d0, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(FB_d0)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d0, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(FB_d0, 'sample')

In [None]:
sc.pp.neighbors(FB_d0, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(FB_d0)

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(FB_d0, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    FB_d0, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB_d0, 
    color=['Adipoq'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

In [None]:
FB_d0.obs['cell_type_subset'] = ['3:capsFB' if (x=='1') else 
                                 '4:intFB' if (x=='2' or x=='3' or x=='4') else
                                 '5:medFB' if (x=='0') else
                                 '9:Fat' if (x=='5') else 'ERROR' for x in FB_d0.obs['leiden_0.5']] 

In [None]:
FB_d0.uns['cell_type_subset_colors'] = ['#BDCDFF', '#3B00FB', '#1CFFCE', 'grey']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB_d0, 
    color=['cell_type_subset'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

In [None]:
FB_d0_nofat = FB_d0[(FB_d0.obs['cell_type_subset']!='9:Fat')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(FB_d0_nofat, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(FB_d0_nofat, n_top_genes=3500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d0_nofat, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(FB_d0_nofat)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d0_nofat, n_comps=45, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(FB_d0_nofat, 'sample')

In [None]:
sc.pp.neighbors(FB_d0_nofat, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(FB_d0_nofat)

In [None]:
capsular = ['Akr1c18',	'Mrgprg',	'Upk3b',	'Smpd3',	'Sema3c',	'Dpp4',	'Efhd1',	'Pcsk6',	'Pi16',	'Ackr3',	'Sfrp2',	'Msln',	'Sfrp4',	'Adgrd1',	'Spon2',	'Mfap5',	'Gpc3',	'Saa3',	'Ogn',	'Mt2',	'Cpxm2',	'Lrrn4cl',	'Fndc1',	'Anxa3',	'Qpct',	'Cxcl13',	'Smoc2',	'Igfbp6',	'Nov',	'Csrp2']
medullary = ['Gja4',	'Mmp9',	'Pde2a',	'Vtn',	'Des',	'Crsc',	'Ecscr',	'C1qtnf5',	'Cx3cl1',	'Acta2',	'Meox1',	'Sdc3',	'Postn',	'Tagln',	'Serpine2',	'Enpp2',	'SlcO2b1',	'Ndufa4l2',	'Serpina3g',	'Ltbp1',	'Gfra2',	'Mfge8',	'Rasa3']
perilobular = ['C7',	'Dpt',	'Ptn',	'Ptgds',	'Rbp5',	'Rspo3',	'Mfap4',	'Sparcl1',	'Srpx',	'Hspb6',	'Ccl2',	'Fxyd6',	'Nr2f1',	'Dbi',	'Spry1',	'Litaf',	'Dcn',	'Runx1t1',	'Rwdd1']
interlobular = ['Lum',	'Mgp',	'Ogn',	'Fn1',	'Igfbp6',	'Col1a2',	'Col14a1',	'Col3a1',	'Col1a1',	'Fbn1',	'Dcn',	'Itm2a',	'Ccdc80',	'Ctsk',	'Wisp2',	'Id2',	'Spon2',	'Fstl1',	'Fbln2',	'Mfap5']

In [None]:
sc.tl.score_genes(FB_d0_nofat, gene_list = perilobular,  score_name='perilobular\n(human)', use_raw=False)
sc.tl.score_genes(FB_d0_nofat, gene_list = interlobular,  score_name='interlobular\n(human)', use_raw=False)
sc.tl.score_genes(FB_d0_nofat, gene_list = capsular,  score_name='capsular\n(mouse)', use_raw=False)
sc.tl.score_genes(FB_d0_nofat, gene_list = medullary,  score_name='medullary\n(mouse)', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB_d0_nofat,
    color=[ 'capsular\n(mouse)', 'medullary\n(mouse)', 'perilobular\n(human)', 'interlobular\n(human)'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0
)

In [None]:
sc.pl.umap(FB_d0_nofat, color=['cell_type_subset', 'stage', 'day', 'sample'], 
                     color_map='Spectral_r',
                     use_raw=False, 
                     ncols=4, 
                     wspace = 0.2,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 
                     sort_order = False)

### TEC

In [None]:
TEC_d0 = adata_d0[(adata_d0.obs['cell_type']=='TEC')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(TEC_d0, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(TEC_d0, n_top_genes=3500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_d0, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(TEC_d0)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_d0, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(TEC_d0, 'sample')

In [None]:
sc.pp.neighbors(TEC_d0, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(TEC_d0)

###  Bornstein et al.

In [None]:
cTEC = ['Ccl25', 'Psmb11','Ly75','Prss16', 'Ctsl','Pax1']
mTEC1 = ['Itga6', 'Itgb4', 'Sox4', 'Ly6a', 'Ccl21a', 'Krt5', 'Ascl1', 'Ccl21c', 'Apoe']
mTEC2 = ['Aire', 'Fezf2', 'Cd40', 'H2-Aa', 'H2-Ab1', 'Cd74', 'Hdc', 'Ubd', 'Cd52']
mTEC3a = ['Spink5', 'Ly6d']
mTEC3b = ['Pigr', 'Car8']
mTEC3c = ['Ccl20', 'Serpinb6a']
mTEC4 =  ['Lrmp', 'Avil', 'Trpm5', 'Dclk1', 'Gng13', 'Ptgs1', 'L1cam', 'Sox9', 'Il25', 'Pou2f3', 'Gp2', 'Gnb3', 'Ptgs1', 'Il17rb']

In [None]:
sc.tl.score_genes(TEC_d0, gene_list = cTEC,  score_name='cTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mTEC1,  score_name='mTEC1', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mTEC2,  score_name='mTEC2', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mTEC3a,  score_name='mTEC3(a)', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mTEC3b,  score_name='mTEC3(b)', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mTEC3c,  score_name='mTEC3(c)', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mTEC4,  score_name='mTEC4', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d0,
    color=['cTEC', 'mTEC1', 'mTEC2', 'mTEC3(a)', 'mTEC3(b)', 'mTEC3(c)','mTEC4'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=4,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0
)

### Dhalla et al.

In [None]:
preAire1 = ['Krt5',	'Ifi27l2a',	'Ccl21a',	'Krt14',	'Ifitm3',	'Apoe',	'Bst2',	'Ifitm2',	'Isg15',	'Ly6a']
preAire2 = ['Ccl21a',	'Ctsl',	'Ifitm3',	'Ifi27l2a',	'Tpm2',	'Tmsb10',	'Krt5',	'Mdk',	'Krt7',	'Krt17']
proliferating = ['Eif5a',	'Ran',	'Ppia',	'Npm1',	'H2afz',	'Eif4a1',	'Stmn1',	'Phgdh',	'Rps2',	'Utf1']
mature1 = ['Rplp0',	'Hdc',	'Aire',	'H2-dmb2',	'Hagh',	'Calcb',	'S100a14',	'Lrrc42',	'Cd74',	'Srgn']
mature2 = ['Ubd',	'Csn2',	'Malat1',	'H2-Eb1']
mature3 = ['Cd52',	'Npc2',	'H2-Oa',	'H2-Ab1',	'Ubd',	'H2-Aa',	'H2-Eb1',	'Atp6v0e',	'Srgn',	'Alas1']
postAire1 = ['Aw112010',	'Lypd8',	'Cldn7',	'Prap1',	'Spink1',	'Ahcyl2',	'2200002D01Rik',	'Gpx1',	'2210407c18Rik',	'Lgals4']
postAire2 = ['Ly6d',	'Fxyd3',	'Spink5',	'Perp',	'Gsta4',	'Pdzk1ip1',	'Hspb1',	'Prdx5',	'Oit1',	'Dmkn']
postAire3 = ['Cd9',	'Igfbp5',	'Dnajc12',	'Car8',	'Ceacam10',	'Calm2',	'Cd164',	'Btg1',	'Scg5',	'Hepacam2']
tuftLike = ['Gng13',	'Avil',	'Lrmp',	'Reep5',	'Espn',	'Ivns1Abp',	'Ptpn18',	'Mctp1',	'Ltc4s',	'Anxa4']
unknown = ['Cd3g',	'Ptprcap',	'Lat',	'Cd3d',	'Lck',	'Rpl13',	'Rpl13a',	'Rps4x',	'Rps18',	'Id3']
fibroblastLike1 = ['Sparc',	'Igfbp7',	'Rarres2',	'Lgals1',	'Serping1',	'Gsn',	'Dcn',	'Cd302',	'Serpinh1',	'Postn']
fibroblastLike2 = ['Ctla2a',	'Pecam1',	'Emcn',	'Gng11',	'Igfbp7',	'Cd34',	'Sparcl1',	'Tm4sf1',	'Plvap',	'Sepp1']
ciliated = ['Dynlrb2',	'Fam183b',	'Tppp3',	'Elof1',	'Chchd10',	'Riiad1',	'Nudc',	'1110017d15Rik',	'Tubb4b',	'1700016k19Rik']
GP2preferred = ['Serpinb6a',	'Ccl20',	'Ccl9',	'Serpinb1a',	'Ccl6',	'Pglyrp1',	'2200002d01Rik',	'Bcl2a1b',	'Tndaip2',	'Tmsb4x']

In [None]:
sc.tl.score_genes(TEC_d0, gene_list = preAire1,  score_name='pre-Aire mTEC1', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = preAire2,  score_name='pre-Aire mTEC2', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = proliferating,  score_name='prol mTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mature1,  score_name='mat mTEC1', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mature2,  score_name='mat mTEC2', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mature3,  score_name='mat mTEC3', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = postAire1,  score_name='post-Aire mTEC1', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = postAire2,  score_name='post-Aire mTEC2', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = postAire3,  score_name='post-Aire mTEC3', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = GP2preferred,  score_name='GP2-pref mTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = tuftLike,  score_name='tuft-like mTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = unknown,  score_name='unknown TEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = fibroblastLike1,  score_name='fibroblast-like1', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = fibroblastLike2,  score_name='fibroblast-like2', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = ciliated,  score_name='ciliated TEC', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d0,
    color=['pre-Aire mTEC1', 'pre-Aire mTEC2', 'prol mTEC', 'mat mTEC1', 'mat mTEC2', 'mat mTEC3',
           'post-Aire mTEC1', 'post-Aire mTEC2', 'post-Aire mTEC3', 'GP2-pref mTEC', 'tuft-like mTEC','ciliated TEC', 'unknown TEC',
           'fibroblast-like1', 'fibroblast-like2'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=4,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0
)

### Baran-Gale et al.

In [None]:
per_cTEC = ['Syngr1', 'Gper1']
mat_cTEC = ['Prss16', 'Cxcl12']
int_TEC = ['Ccl21a', 'Krt5']
prol_TEC = ['Ccna2', 'Pbk']
mat_mTEC = ['Aire', 'Cd52']
postAIRE_mTEC = ['Krt80', 'Spink5']
nTEC = ['Cd177', 'Car8']
tuft = ['Avil', 'Trpm5']
sTEC = ['Sod3', 'Dpt']

In [None]:
sc.tl.score_genes(TEC_d0, gene_list = per_cTEC, score_name='per_cTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mat_cTEC, score_name='mat_cTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = int_TEC, score_name='int_TEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = prol_TEC, score_name='prol_TEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = mat_mTEC, score_name='mat_mTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = postAIRE_mTEC, score_name='postAIRE_mTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = nTEC, score_name='nTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = tuft, score_name='tuft', use_raw=False)
sc.tl.score_genes(TEC_d0, gene_list = sTEC, score_name='sTEC', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d0,
    color=['per_cTEC', 'mat_cTEC', 'int_TEC', 'prol_TEC','mat_mTEC', 'postAIRE_mTEC','nTEC', 'tuft', 'sTEC'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=4,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0
)

### Nusser et al.

In [None]:
early_progenitor_genes = ['Ackr4',	'Adamts10',	'Agrn',	'Aldh2',	'Aldh6a1',	'Amotl1',	'Amotl2',	'Antxr1',	'Apoe',	'Ar',	'Bcam',	'Bcl11a',	'Bcl2',	'Bmp4',	'Btg2',	'Cbx6',	'Ccdc80',	'Cdh11',	'Cldn8',	'Clec11a',	'Clstn1',	'Col18a1',	'Cpne8',	'Cthrc1',	'Cyp1b1',	'Dcn',	'Ddr1',	'Dhrs3',	'Dlk2',	'Dnajc13',	'Dpp6',	'Dpysl2',	'Dsc3',	'Egr1',	'Eid1',	'Fam129a',	'Fam19a5',	'Fkbp9',	'Fmod',	'Fos',	'Fosb',	'Frmd6',	'Fstl1',	'Ogn',	'Gas1',	'Pak3',	'Gbp2',	'Palld',	'Gnaq',	'Pdpn',	'Gpm6b',	'Penk',	'Gprasp1',	'Plxdc2',	'Gstm2',	'Pmp22',	'H2-Dma',	'Prelp',	'Hes6',	'Prrg3',	'Hic1',	'Prss23',	'Hsd17b10',	'Ptprz1',	'Igfbp2',	'Ptrf',	'Igfbp3',	'Pygb',	'Igfbp5',	'Rbp1',	'Igfbp7',	'Rnase4',	'Iigp1',	'Scn1a',	'Il33',	'Serpinf1',	'Irgm1',	'Serpinh1',	'Isl1',	'Shisa2',	'Itm2c',	'Slc2a13',	'Kazald1',	'Sord',	'Lamb1',	'Sparc',	'Laptm4a',	'Spon2',	'Limch1',	'Spry1',	'Ltbp3',	'Tcn2',	'Maged1',	'Tgfbr2',	'Megf6',	'Tgfbr3',	'Meis1',	'Thbd',	'Mgll',	'Thbs1',	'Mgp',	'Timp2',	'Myl9',	'Tinagl1',	'Mylk',	'Tnfrsf19',	'Nbl1',	'Tns1',	'Nell2',	'Tns3',	'Nfia',	'Trim29',	'Nfib',	'Trp63',	'Nfix',	'Tspan9',	'Nr2f1',	'Twsg1',	'Nr4a1',	'Txnip',	'Nrtn',	'Unc119',	'Ntrk3',	'Vmac',	'Oat',	'Wls',	'Wscd1',	'Xdh',	'Zfp36']
postnatal_progenitor_genes = ['Acta2',	'Apoe',	'Ascl1',	'Boc',	'C1s1',	'C3',	'Cald1',	'Ccl11',	'Ccl21a',	'Clca3a1',	'Col6a1',	'Col6a2',	'Cyr61',	'Ddx60',	'Dpysl3',	'Dst',	'Emp2',	'Fam101b',	'Flna',	'Fst',	'Fzd2',	'Gas1',	'Glul',	'Gpx3',	'Gsn',	'Hpgd',	'Htra1',	'Id1',	'Ifi27l2a',	'Igfbp4',	'Igfbp5',	'Irf7',	'Isg15',	'Itga6',	'Itgb4',	'Krt14',	'Krt5',	'Krt7',	'Lamb3',	'Lars2',	'Lifr',	'Mgp',	'Myl9',	'Nrbp2',	'S1pr3',	'Slc4a11',	'Sox4',	'Stat2',	'Sult5a1',	'Tagln',	'Tgfbi',	'Tpm2',	'Wfikkn2']

In [None]:
sc.tl.score_genes(TEC_d0, early_progenitor_genes, score_name='early progenitor\nsignature', use_raw=False)
sc.tl.score_genes(TEC_d0, postnatal_progenitor_genes, score_name='postnatal progenitor\nsignature', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d0,
    color=['early progenitor\nsignature', 'postnatal progenitor\nsignature'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=4,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0
)

### Michelson et al., 2022

In [None]:
per_cTEC = ['Prss16',	'Ctsl',	'Fabp5',	'Cxcl12',	'Cstb',	'Pltp',	'Psmb11',	'Hes6',	'Pax1',	'Crip3',	'Castor1',	'Nqo1',	'Mdk',	'1810058I24Rik',	'Ndufa11',	'Kcnk2']
adult_cTEC = ['Prss16',	'Tbata',	'Ccl25',	'Ctsl',	'Psmb11',	'Cxcl12',	'Fxyd2',	'Cstb',	'Crip3',	'Ly75',	'Isg20',	'Tmem131l',	'Prxl2b',	'Synm',	'Pax1',	'Cd83',	'Pltp',	'Ndrg3',	'Fn1',	'Gas6',	'Sgpl1']
transit_ampl = ['Hmgb2',	'H2az1',	'Stmn1',	'Ran',	'Mki67',	'Cenpf',	'Cks2',	'Hist1h2ap',	'Top2a']
immature = ['Rgs5',	'Ccl21a',	'Fcgbp',	'Tagln',	'Col6a1',	'Krt14',	'Gm8113',	'Lifr',	'Krt5',	'Flna']
aire = ['S100a14',	'Cd52',	'Cyba',	'Srgn',	'H2-Ab1',	'H2-Aa',	'H2-Eb1',	'Ubd',	'Cd74',	'Aire',	'Ifi30',	'Hdc',	'Calcb',	'Csn2',	'Cxcl9',	'S100a8',	'Ccl5',	'Lyz2',	'S100a9',	'Nts',	'Pyy']
enteropato = ['Reg3g',	'Lypd8',	'Spink1',	'Saa3',	'Prap1',	'Muc13',	'Pigr',	'Lgals4',	'Saa1',	'AW112010',	'Cd52',	'2210407C18Rik',	'Ttr',	'Apoc3',	'Aldob',	'Apoa4',	'Klk1b26',	'S100g',	'Reg3b',	'Fabp9',	'Apoa1',	'Tff3',	'Reg1',	'Clca1']
microfold = ['Ccl20',	'Pglyrp1',	'Serpinb6a',	'Hamp',	'Ccl6',	'Serpinb1a',	'Ccl9',	'Wfdc17',	'Lyz1',	'Rnase1',	'Fabp1',	'Gp2',	'Tnfaip2',	'Msln',	'Cyp2a5',	'Wfdc18',	'Csn2',	'2200002D01Rik',	'Reg3g',	'Ctsh',	'Prg2',	'Gucy1b2',	'Tnfrsf11b',	'Cr2',	'Gjb2',	'Clu',	'Bcl2a1b',	'AW112010',	'H2-M2',	'Fabp5',	'Apoa1',	'Tmsb4x',	'Krt20',	'Spink4',	'Apoa4',	'Nostrin',	'Gpx1',	'Anxa5',	'Spib',	'Plb1',	'Abca13',	'Gm50069',	'Sncg',	'Sox8',	'Bcl2a1a',	'Iapp',	'Bcl2a1d',	'Atp6v1c1',	'Ubd']
neuroendocrine = ['Clca3b',	'Klk1',	'Cd177',	'Resp18',	'Chgb',	'Cacna2d1',	'Chga',	'Ceacam10',	'Rbp4',	'Cd9',	'Iglc3',	'Iapp',	'Ghrl',	'Fam183b',	'Scg2',	'Scg5',	'Itln1',	'Krt7',	'Insm1',	'Fmo3',	'Fmo2',	'Cplx2',	'AW112010']
ptf1a_pancreatic = ['Prss2',	'Clps',	'Spink1',	'Klk1',	'Ptf1a',	'Chga',	'AW112010',	'Try5',	'Nupr1',	'Reg3g',	'Fetub',	'Pigr',	'Car8',	'Ceacam10']
muscle = ['Cdkn1c',	'Meg3',	'Mylpf',	'Acta1',	'Myog',	'Myl1',	'Actc1',	'Sln',	'Ckb',	'Ckm',	'Ttn',	'C430049B03Rik',	'Spg21',	'Mymx',	'Ppp1r14b',	'Dlk1',	'Neb',	'5430431A17Rik',	'Hes6',	'Rian',	'Tnnc2',	'Des',	'Lsm6',	'Arl6ip5',	'Rbm24']
tuft1 = ['Fyb',	'Dclk1',	'Ptpn18',	'Atp1a2',	'Sh2d6',	'Plcb2',	'Nrgn',	'Trpm5',	'Agt',	'Ahnak2',	'Ly6g6f']
tuft2 = ['Gnat3',	'Gnb3',	'Plac8',	'Gng13',	'Lrmp',	'Rgs13',	'Cited2',	'Krt19',	'Ethe1',	'Fxyd6',	'Cystm1']
goblet = ['Ltf',	'Wfdc2',	'Gsto1',	'Ecm1',	'Bpifa1',	'Anxa1',	'Lcn2',	'Nupr1',	'Trf',	'Gsta4',	'Serpinb11',	'Pigr',	'Ly6d',	'Scgb3a2',	'Spink5',	'Ifitm1',	'Cxcl17',	'Serpinb2',	'Psca',	'Ly6a',	'Krt6a',	'Krt6b',	'Prss27',	'Scgb3a1',	'Hspb1',	'S100a6',	'Sftpd',	'Cxcl5',	'Cyp2f2',	'Sat1',	'Fxyd3',	'Cbr2',	'Clu',	'Aqp4',	'Cnfn',	'Rptn',	'Plet1',	'Lgals3']

In [None]:
sc.tl.score_genes(TEC_d0, per_cTEC, score_name='per_cTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, adult_cTEC, score_name='adult_cTEC', use_raw=False)
sc.tl.score_genes(TEC_d0, transit_ampl, score_name='transit_ampl', use_raw=False)
sc.tl.score_genes(TEC_d0, immature, score_name='immature', use_raw=False)
sc.tl.score_genes(TEC_d0, aire, score_name='aire', use_raw=False)
sc.tl.score_genes(TEC_d0, enteropato, score_name='enteropato', use_raw=False)
sc.tl.score_genes(TEC_d0, microfold, score_name='microfold', use_raw=False)
sc.tl.score_genes(TEC_d0, neuroendocrine, score_name='neuroendocrine', use_raw=False)
sc.tl.score_genes(TEC_d0, ptf1a_pancreatic, score_name='ptf1a_pancreatic', use_raw=False)
sc.tl.score_genes(TEC_d0, muscle, score_name='muscle', use_raw=False)
sc.tl.score_genes(TEC_d0, tuft1, score_name='tuft1', use_raw=False)
sc.tl.score_genes(TEC_d0, tuft2, score_name='tuft2', use_raw=False)
sc.tl.score_genes(TEC_d0, goblet, score_name='goblet', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d0, 
    color=['per_cTEC', 'adult_cTEC', 'transit_ampl', 'immature', 'aire', 'enteropato', 'microfold',
           'neuroendocrine', 'ptf1a_pancreatic', 'muscle', 'tuft1', 'tuft2', 'goblet'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=4,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0
)

### TECs clustering and annotation

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(TEC_d0, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    TEC_d0, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)
sc.pl.umap(
    TEC_d0, 
    color=['leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=6,
    size=15,
    wspace = 0.2,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
TEC_d0.obs['cell_type_subset'] = ['11:aaTEC2' if (x=='11') else '10:aaTEC1' if (x=='4' or x=='10') else
                                 '12:cTEC' if (x=='8') else '13:mTEC1' if (x=='0' or x=='5' or x=='7') else 
                                 '14:mTEC-prol' if x=='9' else '15:mTEC2' if (x=='1' or x=='2' or x=='3') else
                                 '16:mTEC3' if ( x=='13') else '17:mimic(tuft)' if x=='6' else '18:mimic(neuroendo)' if x=='12' else
                                 '19:mimic(goblet)' if x=='14' else '20:mimic(microfold)' if x=='15' else 'ERROR' for x in TEC_d0.obs['leiden_1.0']] 

In [None]:
TEC_d0.uns['cell_type_subset_colors'] = [ '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d0, 
    color=['cell_type_subset', 'stage', 'day', 'sample'], 
    ncols=6,
    use_raw=False,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.5,
    add_outline=True
)

In [None]:
sc.pl.umap(TEC_d0, color=['Krt8', 'Krt5', 'Krt14'], 
                     color_map='Spectral_r',
                     use_raw=False, 
                     ncols=5, 
                     wspace = 0.2,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 
                     sort_order = False)

In [None]:
markers =['Epcam', 'H2-Aa', 'Cd74', 'Ccl21a', 'Krt8',  'Krt18', 'Krt5', 'Krt14',  'Foxn1', 'Aire', 'Ikbkb', 'Chuk']

In [None]:
subset_order = [ '12:cTEC', '13:mTEC1', '14:mTEC-prol', '15:mTEC2', '16:mTEC3', '17:mimic(tuft)', '18:mimic(neuroendo)', '19:mimic(goblet)', '20:mimic(microfold)', '10:aaTEC1', '11:aaTEC2']


In [None]:
axes = sc.pl.dotplot(TEC_d0[(TEC_d0.obs['stage']=='18mo')], markers, groupby='cell_type_subset',  categories_order=subset_order, standard_scale='var', cmap='Reds', swap_axes=True)

In [None]:
markers = ['Fgfr1', 'Fgfr2','Bmpr1a',  'Bmpr2',  'Bmpr1b', 'Egfr', 'Ltbr', 'Tnfrsf11a','Lrp1',  'Cdh11',  'Cadm1',  'Cd44',  'Sdc4',  'Itgav', 'Itgb1']

In [None]:
axes = sc.pl.dotplot(TEC_d0[(TEC_d0.obs['stage']=='18mo')], markers, groupby='cell_type_subset',  categories_order=subset_order, vmax=2, cmap='Reds', swap_axes=True)

In [None]:
crosstb_TEC = pd.crosstab(TEC_d0.obs['stage'], TEC_d0.obs['cell_type_subset'], normalize='index')
diffcrosstb_TEC = ((crosstb_TEC.loc["18mo"] - crosstb_TEC.loc["02mo"]) / (crosstb_TEC.loc["18mo"] + crosstb_TEC.loc["02mo"]))*100
crosstb_TEC 

In [None]:
crosstb_FB = pd.crosstab(FB_d0_nofat.obs['stage'], FB_d0_nofat.obs['cell_type_subset'], normalize='index')
diffcrosstb_FB = ((crosstb_FB.loc["18mo"] - crosstb_FB.loc["02mo"]) / (crosstb_FB.loc["18mo"] + crosstb_FB.loc["02mo"]))*100
crosstb_FB

In [None]:
crosstb_EC = pd.crosstab(EC_d0.obs['stage'], EC_d0.obs['cell_type_subset'],  normalize='index')
diffcrosstb_EC = ((crosstb_EC.loc["18mo"] - crosstb_EC.loc["02mo"]) / (crosstb_EC.loc["18mo"] + crosstb_EC.loc["02mo"]))*100
crosstb_EC

In [None]:
diffcrosstb = pd.concat([diffcrosstb_EC, diffcrosstb_FB,  diffcrosstb_TEC])

In [None]:
subset_palette = ['#2ED9FF', '#c1c119', '#8b0000', '#FE00FA',  '#1CFFCE', '#325A9B', '#3283FE', '#FEAF16', '#3B00FB', '#F6222E', '#16FF32', '#BDCDFF',  '#C075A6',  '#AA0DFE', "#F8A19F", '#1CBE4F','#B5EFB5'][::-1]
with rc_context({'figure.figsize': (3, 7)}):
    ax = diffcrosstb.sort_values(ascending=True).plot(kind="barh", stacked=True, edgecolor = "black", color=subset_palette)
    ax.grid(False)
    ax.add_artist(lines.Line2D([0,0], [0,100], color='black',  lw=1,  ls='--'))
    ax.plot(legend=None)
    #plt.savefig('proportions.pdf')

### Transfer annotation

In [None]:
annotated_subsets = pd.concat([EC_d0.obs['cell_type_subset'], FB_d0.obs['cell_type_subset'],  TEC_d0.obs['cell_type_subset'], adata_d0[(adata_d0.obs['cell_type']=='6:MEC') | (adata_d0.obs['cell_type']=='7:vSMC/PC') | (adata_d0.obs['cell_type']=='8:nmSC')].obs['cell_type']])

In [None]:
adata_d0.obs['cell_type_subset']=''

In [None]:
adata_d0.obs['cell_type_subset'][adata_d0.obs.index.isin(annotated_subsets.index) == True] = annotated_subsets

In [None]:
sc.pl.umap(adata_d0, color=['cell_type', 'stage', 'day'], 
                     color_map='Spectral_r',
                     use_raw=False, 
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 
                     sort_order = False)

In [None]:
subset_palette =  ['#F6222E', '#3283FE', '#16FF32', '#BDCDFF', '#3B00FB', '#1CFFCE', '#d62728', '#19c9b3','#FFA5D2',   'grey', '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6', 'black']

sc.pl.umap(adata_d0, color=['cell_type_subset'], 
                     color_map='Spectral_r',
                     palette=subset_palette,
                     use_raw=False, 
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 
                     sort_order = False)

In [None]:
path_to_h5ad = '../output/metadata/anndata_objects/Fig1_pt1_annotated.h5ad'

In [None]:
adata_d0.write(path_to_h5ad)

In [None]:
adata_d0 = sc.read_h5ad(path_to_h5ad)
adata_d0.uns['log1p']["base"] = None

### EMT signature

In [None]:
adata_d0.obs['stage_and_subset'] = ''
adata_d0.obs['stage_and_subset'] = adata_d0.obs[['stage', 'cell_type_subset']].agg('_'.join, axis=1)

In [None]:
TEC_d0_emt = adata_d0[((adata_d0.obs['cell_type_subset']=='10:aaTEC1') |
                        (adata_d0.obs['cell_type_subset']=='11:aaTEC2') |
                        (adata_d0.obs['cell_type_subset']=='12:cTEC') |
                        (adata_d0.obs['cell_type_subset']=='13:mTEC1') |
                        (adata_d0.obs['cell_type_subset']=='5:medFB')) & (adata_d0.obs['stage']=='18mo')]

In [None]:
adata_d0_emt = adata_d0[((adata_d0.obs['cell_type']=='TEC')  |
                        (adata_d0.obs['cell_type_subset']=='5:medFB')) & (adata_d0.obs['stage']=='18mo')]

In [None]:
sc.pp.filter_genes(adata_d0_emt, min_cells=1)

In [None]:
import magic

In [None]:
emt_genes = pd.read_excel('../data/public/mmc2.xlsx')

In [None]:
emt_genes_common = adata_d0_emt.var.index & emt_genes.gene.values

In [None]:
magic_op = magic.MAGIC()

In [None]:
magic_op.set_params(knn=5, t=4)

In [None]:
emt_genes = ['Epcam', 'Pax4', 'Egr1',
             'Jun', 'Junb', 'Atf4', 'Sox4', 'Klf6', 'Cd44', 'Fn1', 'Tgm2', 'Foxc2', 'Prrx1', 'Pou5f1', 'Tcf4', 'Cdh1', 'Esrp1', 'Grhl2', 'Esrp2', 'Ovol1', 'Ovol2', 'Snai1', 'Rbfox2', 'Vim', 'Zeb2', 'Zeb1', 'Twist1','Cdh1', 'Zeb2', 'Vim', 'Spp1', 'Krt14', 'Ar', 'Sparc', 'Esrp1', 'Twist1', 'Trpm3','Cldn3',	'Slc16a11',	'Slc9a3r1','Isl1',	'Cdk19',	'Ptprd',	'Taldo1',	'BC006965', 'Aldoc',	'Socs2',	'Fam107a','Fbxo2',	'Nedd4l',	'Shank2','Kif19a',	'Bmp6',	'Id2',	'Arl4c', 'Des',	'Slc5a8',	'Kcnk1',	'Kirrel3',		'Prxl2a',	'Thsd4',	'Rdh10',	'Rhov',	'Plet1',	'Cited4',	'Galm',	'Rtl4',	'Gna14',	'Dclk2',	'Nipal2',
            'Gata6', 'Fap', 'Fn1', 'Gas1', 'Notch2', 'Pdgfra', 'Pdgfrb', 'Plin2', 'Ndrg2', 'Prrx1', 'Olfm2',
                'Notch1', 'Notch3', 'Il6',  'Snai2', 'Spp1', 'Tgfb1', 'Tgfbr3', 'Thy1', 'Timp1', 'Timp3']

In [None]:
adata_d0_emt_magic = magic_op.fit_transform(adata_d0_emt, genes=emt_genes_common)

In [None]:
adata_d0_emt_magic.obs['cdh1_time'] = pd.DataFrame(adata_d0_emt_magic.X, columns=adata_d0_emt_magic.var.index)['Cdh1'].values

In [None]:
adata_d0_emt_magic.obs['vim_time'] = pd.DataFrame(adata_d0_emt_magic.X, columns=adata_d0_emt_magic.var.index)['Vim'].values

In [None]:
import scvelo as scv
scv.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True, format='pdf')

In [None]:
import random

len(emt_genes_common)

In [None]:
scv.pl.heatmap(adata_d0_emt_magic, var_names=random.sample(list(emt_genes_common), 2000), sortby='cdh1_time', color_map=user_defined_cmap_degs, col_color='cell_type_subset', n_convolve=100)#, save='vim-sorted_EMT_genes.pdf')

In [None]:
adata_d0_emt_magic.uns['cell_type_subset_colors'] = adata_d0_emt.uns['cell_type_subset_colors'] 

In [None]:
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='cell_type_subset', palette=adata_d0_emt.uns['cell_type_subset_colors'], projection='2d', size=100, color_map='inferno')#, save='_EMT_subset.pdf')


In [None]:
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Epcam', projection='2d', size=100, color_map='inferno',  save='_EMT_Epcam.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Esrp1', projection='2d', size=100, color_map='inferno',  save='_EMT_Esrp1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Esrp2', projection='2d', size=100, color_map='inferno',  save='_EMT_Esrp2.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Spp1', projection='2d', size=100, color_map='inferno',  save='_EMT_Spp1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Zeb2', projection='2d', size=100, color_map='inferno',  save='_EMT_Zeb2.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Grhl2', projection='2d', size=100, color_map='inferno', save='_EMT_Grhl2.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Ovol1', projection='2d', size=100, color_map='inferno', save='_EMT_Ovol1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Ovol2', projection='2d', size=100, color_map='inferno', save='_EMT_Ovol2.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Rbfox2', projection='2d', size=100, color_map='inferno', save='_EMT_Rbfox2.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Plin2', projection='2d', size=100, color_map='inferno', save='_EMT_Plin2.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Ndrg2', projection='2d', size=100, color_map='inferno', save='_EMT_Ndrg2.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Prrx1', projection='2d', size=100, color_map='inferno', save='_EMT_Prrx1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Olfm2', projection='2d', size=100, color_map='inferno', save='_EMT_Olfm2.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Notch1', projection='2d', size=100, color_map='inferno', save='_EMT_Notch1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Notch3', projection='2d', size=100, color_map='inferno', save='_EMT_Notch3.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Tgfbr3', projection='2d', size=100, color_map='inferno', save='_EMT_Tgfbr3.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Zeb1', projection='2d', size=100, color_map='inferno', save='_EMT_Zeb1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Il6', projection='2d', size=100, color_map='inferno', save='_EMT_Il6.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Snai1', projection='2d', size=100, color_map='inferno', save='_EMT_Snai1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Snai2', projection='2d', size=100, color_map='inferno', save='_EMT_Snai2.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Thy1', projection='2d', size=100, color_map='inferno', save='_EMT_Thy1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Tgfb1', projection='2d', size=100, color_map='inferno', save='_EMT_Tgfb1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Twist1', projection='2d', size=100, color_map='inferno', save='_EMT_Twist1.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Des', projection='2d', size=100, color_map='inferno', save='_EMT_Des.pdf')
sc.pl.scatter(adata_d0_emt_magic, x='Vim',  y='Cdh1', color='Pdgfra', projection='2d', size=100, color_map='inferno', save='_EMT_Pdgfra.pdf')

### Save average expression per gene, per group

In [None]:
adata_d0.obs['stage_and_subset'] = ''
adata_d0.obs['stage_and_subset'] = adata_d0.obs[['stage', 'cell_type_subset']].agg('_'.join, axis=1)

In [None]:
def grouped_obs_mean(adata, group_key, layer=None, gene_symbols=None):
    if layer is not None:
        getX = lambda x: x.layers[layer]
    else:
        getX = lambda x: x.X
    if gene_symbols is not None:
        new_idx = adata.var[idx]
    else:
        new_idx = adata.var_names

    grouped = adata.obs.groupby(group_key)
    out = pd.DataFrame(
        np.zeros((adata.shape[1], len(grouped)), dtype=np.float64),
        columns=list(grouped.groups.keys()),
        index=adata.var_names
    )

    for group, idx in grouped.indices.items():
        X = getX(adata[idx])
        out[group] = np.ravel(X.mean(axis=0, dtype=np.float64))
    return out

In [None]:
grouped_obs_mean(adata_d0, group_key='stage_and_subset').to_excel('adata_d0_exprs.xlsx', sheet_name='Sheet1')

## Include looms

In [None]:
import scvelo as scv
scv.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True, format='pdf')

In [None]:
from pathlib import Path

ds_list = []

loom_path = ['../data/10xGenomics/LJ078/LJ078-Baseline_1_1/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ078/ECs/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ078/LJ078-Baseline_1_2/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ078/FBs/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ129/LJ129_Baseline_CD45negative_b1/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ129/LJ129_Baseline_CD45negative_b2/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ129/LJ129_Baseline_EC/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ129/LJ129_Baseline_FB/cr-results/velocyto/cr-results.loom'      
            ]

with loompy.new('loom_merged_d0.loom') as dsout:  # Create a new, empty, loom file
    for f in loom_path:
        with loompy.connect(f) as ds:
            totals = ds.map([np.sum], axis=1)[0]
            cells = np.where(totals > 0)[0] # Select the cells that passed QC (totals > 500)
            ds_list.append(ds.shape[1])
            for (ix, selection, view) in ds.scan(items=cells, axis=1):
                dsout.add_columns(view.layers, col_attrs=view.ca, row_attrs=view.ra)

In [None]:
ldata = scv.read('loom_merged_d0.loom')

In [None]:
ds_list = []

loom_path = ['../data/10xGenomics/LJ078/LJ078-Baseline_1_1/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ078/ECs/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ078/LJ078-Baseline_1_2/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ078/FBs/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ129/LJ129_Baseline_CD45negative_b1/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ129/LJ129_Baseline_CD45negative_b2/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ129/LJ129_Baseline_EC/cr-results/velocyto/cr-results.loom',
             '../data/10xGenomics/LJ129/LJ129_Baseline_FB/cr-results/velocyto/cr-results.loom'      
            ]
for f in loom_path:
    with loompy.connect(f) as ds:
        totals = ds.map([np.sum], axis=1)[0]
        cells = np.where(totals > 0)[0] # Select the cells that passed QC (totals > 500)
        ds_list.append(ds.shape[1])
           

In [None]:
x = np.repeat(np.array([ '-1@mo02_CD45neg1_d0', '-1@mo02_EC_d0', '-1@mo02_CD45neg2_d0', '-1@mo02_FB_d0',
                         '-1@mo18_CD45neg1_d0', '-1@mo18_CD45neg2_d0', '-1@mo18_EC_d0', '-1@mo18_FB_d0']), ds_list, axis=0)

In [None]:
ldata.obs['name_match'] = x

In [None]:
start = ':'
end = 'x'

barcodes = []
for loom_index in ldata.obs_names:
    barcodes.append(str(loom_index[loom_index.find(start)+len(start):loom_index.rfind(end)]))
ldata.obs['barcodes'] = barcodes

In [None]:
ldata.obs_names = ldata.obs[['barcodes','name_match']].agg(''.join, axis=1)

In [None]:
from collections import Counter
[k for k, v in Counter(ldata.obs_names).items() if v > 1]

In [None]:
adata_d0_loom = scv.utils.merge(adata_d0, ldata)

In [None]:
adata_d0_loom.shape

In [None]:
# Genes of interest: Ar, Grin2a, 'Notch3', 'Heyl', 'Trpm3', 'Csmd1', 'Aldoc', 'Kif19a', Bmp6, 'Bmp6', 'Bmp7', 'Tgfb1', 'Tgfb2', 'Ndrg2'

### RNA velocity - only TECs

In [None]:
TEC_d0_loom_nomimics = adata_d0_loom[((adata_d0_loom.obs['cell_type_subset']=='10:aaTEC1') | (adata_d0_loom.obs['cell_type_subset']=='11:aaTEC2') | (adata_d0_loom.obs['cell_type_subset']=='12:cTEC') |
                      (adata_d0_loom.obs['cell_type_subset']=='13:mTEC1') | (adata_d0_loom.obs['cell_type_subset']=='14:mTEC-prol') | (adata_d0_loom.obs['cell_type_subset']=='15:mTEC2') |
                      (adata_d0_loom.obs['cell_type_subset']=='16:mTEC3'))] 

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(TEC_d0_loom_nomimics, min_cells=1)

In [None]:
TEC_d0_loom_nomimics.obs['distinct'] = ''
TEC_d0_loom_nomimics.obs['distinct'] = TEC_d0_loom_nomimics.obs[['stage', 'cell_type_subset']].agg(' '.join, axis=1)

In [None]:
sc.pp.highly_variable_genes(TEC_d0_loom_nomimics, n_top_genes=3500, n_bins=20, flavor='seurat', inplace=True)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_d0_loom_nomimics, n_comps=20, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(TEC_d0_loom_nomimics, 'sample')

In [None]:
sc.pp.neighbors(TEC_d0_loom_nomimics, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(TEC_d0_loom_nomimics)

In [None]:
scv.tl.velocity(TEC_d0_loom_nomimics, mode='stochastic')

In [None]:
scv.tl.velocity_graph(TEC_d0_loom_nomimics)

In [None]:
scv.pl.velocity_embedding_stream(TEC_d0_loom_nomimics, basis='umap')

In [None]:
# also scv.pl.velocity_embedding_grid
scv.pl.velocity_embedding_stream(TEC_d0_loom_nomimics, basis='umap', color=['cell_type_subset'],alpha=0.7, legend_loc='right')

In [None]:
TEC_d0_loom_nomimics.obs['distinct'] = ''
TEC_d0_loom_nomimics.obs['distinct'] = TEC_d0_loom_nomimics.obs[['stage', 'cell_type_subset']].agg(' '.join, axis=1)

In [None]:
TEC_d0_loom_nomimics.uns['distinct_colors'] = ['#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5',
                                               '#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5']

In [None]:
scv.pl.velocity_embedding_stream(TEC_d0_loom_nomimics, basis='umap', groups=['02mo 10:aaTEC1', '02mo 11:aaTEC2', '02mo 12:cTEC', '02mo 13:mTEC1', '02mo 14:mTEC-prol', '02mo 15:mTEC2', '02mo 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='02mo_bystage.svg')
scv.pl.velocity_embedding_stream(TEC_d0_loom_nomimics, basis='umap', groups=['18mo 10:aaTEC1', '18mo 11:aaTEC2', '18mo 12:cTEC', '18mo 13:mTEC1', '18mo 14:mTEC-prol', '18mo 15:mTEC2', '18mo 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='18mo_bystage.svg')


### Violin plots of markers for EC, FB, TEC with scanpy v1.4.6

In [None]:
#import sys
#!{sys.executable} -m pip install scanpy==1.4.6

In [None]:
import scanpy as sc
import pandas as pd
pd.set_option('display.max_rows', 200)

sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()

In [None]:
markers = ['Pdgfra',  'Epcam', 'H2-Aa',  'Pecam1', 'Cdh5',  'Nkain4', 'Upk3b', 'Acta2', 'Myl9', 'S100b']

In [None]:
axes = sc.pl.stacked_violin(adata_d0, markers, groupby='cell_type', palette=['#199919', '#aa40fc', '#ff7f0e',                                                                            '#d62728', '#19c9b3', '#FFA5D2'], standard_scale='var', cut=3, use_raw=False, figsize=(3,5), order = ['FB', 'TEC', 'EC', '6:MEC', '7:vSMC/PC', '8:nmSC'], swap_axes=True, save='_d0.pdf')

In [None]:
EC_markers = ['Pecam1', 'Vwf', 'Vcam1', 'Glul', 'Cldn5', 'Aqp7', 'Car4', 'Kdr', 'Plvap',  'Selp', 'Bmp4']

In [None]:
EC_d0 = adata_d0[adata_d0.obs['cell_type']=='EC']

In [None]:
axes = sc.pl.stacked_violin(adata_d0[adata_d0.obs['cell_type']=='EC'], EC_markers, groupby='cell_type_subset', palette=adata_d0[adata_d0.obs['cell_type']=='EC'].uns['cell_type_subset_colors'], standard_scale='var', cut=3, use_raw=False, figsize=(1.75,6), swap_axes=True, save='_EC.pdf')

In [None]:
FB_markers = ['Pdgfra', 'Dpp4', 'Fn1',  'Pi16', 'Gpx3', 'Ar', 'Atf3', 'Inmt', 'Bmp4', 'Ptn' , 'Postn']

In [None]:
axes = sc.pl.stacked_violin(adata_d0[(adata_d0.obs['cell_type']=='FB') & (adata_d0.obs['cell_type_subset']!='9:Fat')], FB_markers, groupby='cell_type_subset', palette=adata_d0[(adata_d0.obs['cell_type']=='FB') & (adata_d0.obs['cell_type_subset']!='9:Fat')].uns['cell_type_subset_colors'], standard_scale='var', cut=3, use_raw=False, figsize=(1.75,6), swap_axes=True, save='_FB.pdf')

In [None]:
TEC_markers =['Epcam', 'H2-Aa',  'Prss16', 'Ccl21a', 'Ccnd2', 'Aire', 'Ly6d',  'Wfdc2', 'Ccl20', 'Car8',  'Avil']

In [None]:
subset_order = ['12:cTEC','13:mTEC1', '14:mTEC-prol',   '15:mTEC2', '16:mTEC3', '19:mimic(goblet)', '20:mimic(microfold)', '18:mimic(neuroendo)', '17:mimic(tuft)',  '10:aaTEC1', '11:aaTEC2']

In [None]:
axes = sc.pl.stacked_violin(adata_d0[adata_d0.obs['cell_type']=='TEC'], TEC_markers, groupby='cell_type_subset', palette=[ '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f',
       '#b5efb5','#325a9b','#c075a6', '#feaf16',  '#aa0dfe',  '#2ed9ff', '#c1c119'], standard_scale='var', cut=3, order=subset_order, use_raw=False, figsize=(6.4,6), swap_axes=True, save='_TEC.pdf')

## Differential expression analysis

### Wilcoxon test

### EC subset markers

In [None]:
sc.tl.rank_genes_groups(EC_d0, 'cell_type_subset', method='wilcoxon', use_raw=False)

In [None]:
writer = pd.ExcelWriter('EC_d0_wilcox.xlsx', engine='xlsxwriter')

# top 5 differentially expressed genes for each cell type. Change number in brackets to get a more extended gene list
result = EC_d0.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer)

writer.save()

Using the plotting function below scanpy allows to visualize the results.

In [None]:
sc.pl.rank_genes_groups_dotplot(EC_d0, n_genes=20, standard_scale='var', cmap=user_defined_cmap_degs)

### FB subset markers

In [None]:
sc.tl.rank_genes_groups(FB_d0_nofat, 'cell_type_subset', method='wilcoxon', use_raw=False)

In [None]:
writer = pd.ExcelWriter('FB_d0_wilcox.xlsx', engine='xlsxwriter')

# top 5 differentially expressed genes for each cell type. Change number in brackets to get a more extended gene list
result = FB_d0_nofat.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer)

writer.save()

Using the plotting function below scanpy allows to visualize the results.

In [None]:
sc.pl.rank_genes_groups_dotplot(FB_d0_nofat, n_genes=20, standard_scale='var', cmap=user_defined_cmap_degs)

### TEC subset markers

In [None]:
sc.tl.rank_genes_groups(TEC_d0, 'cell_type_subset', method='wilcoxon', use_raw=False)

In [None]:
writer = pd.ExcelWriter('TEC_d0_wilcox.xlsx', engine='xlsxwriter')

# top 5 differentially expressed genes for each cell type. Change number in brackets to get a more extended gene list
result = TEC_d0.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer)
writer.save()

Using the plotting function below scanpy allows to visualize the results.

In [None]:
sc.pl.rank_genes_groups_dotplot(TEC_d0, n_genes=20, standard_scale='var', cmap=user_defined_cmap_degs)

### All subset markers

In [None]:
sc.tl.rank_genes_groups(adata_d0, 'cell_type_subset', method='wilcoxon', use_raw=False)

In [None]:
writer = pd.ExcelWriter('adata_d0_wilcox.xlsx', engine='xlsxwriter')

# top 5 differentially expressed genes for each cell type. Change number in brackets to get a more extended gene list
result = adata_d0.uns['rank_genes_groups']
groups = result['names'].dtype.names
pd.DataFrame(
    {group + '_' + key[:1]: result[key][group]
    for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer)

writer.save()

Using the plotting function below scanpy allows to visualize the results.

In [None]:
sc.pl.rank_genes_groups_dotplot(adata_d0, n_genes=5, standard_scale='var', cmap=user_defined_cmap_degs)

In [None]:
sc.pl.rank_genes_groups_dotplot(adata_d0, n_genes=25, vmax=5, cmap=user_defined_cmap_degs)

In [None]:
artEC_sign = ['Tm4sf1',	'Cldn5',	'Ly6c1',	'Ptprb',	'Pecam1',	'Egfl7',	'Ptprm',	'Flt1',	'Cav1',	'Epas1',	'Mecom',	'Fabp4',	'Slc9a3r2',	'Icam2',	'Esam',	'Sox17',	'Cdh5',	'Fbln5',	'Id1',	'Eng',	'Cyyr1',	'Ecscr',	'Adgrf5',	'Arhgap31',	'Cst3',	'Ly6e',	'Podxl',	'Plcb1',	'Pdgfd',	'Cd36',	'Cd200',	'Calm1',	'Stmn2',	'Prex2',	'Klf2',	'Serinc3',	'Mmrn2',	'Sema3g',	'S1pr1',	'Tspan13',	'Cdh13',	'Tinagl1',	'Ldb2',	'Arl15',	'Cav2',	'Crip2',	'Gnai2',	'Cavin2',	'Atox1',	'Hspb1']
capEC_sign = ['Fabp4',	'Gpihbp1',	'Cd36',	'Ly6c1',	'Rgcc',	'Egfl7',	'Tmsb4x',	'Flt1',	'Mgll',	'Pecam1',	'Cav1',	'Cd300lg',	'Cavin2',	'Cdh5',	'Gng11',	'Tspan13',	'Esam',	'Hspb1',	'Emcn',	'Adgrl4',	'Cldn5',	'Tcf15',	'Calm1',	'Ptprm',	'Cyyr1',	'Cd200',	'Tm4sf1',	'Ptprb',	'Etl4',	'Kdr',	'Cav2',	'Arhgap31',	'Kank3',	'Lims2',	'Ctla2a',	'Scarb1',	'Ushbp1',	'Gimap6',	'C1qtnf9',	'Eng',	'Slc9a3r2',	'Car4',	'Fli1',	'Id1',	'Adgrf5',	'Cdh13',	'Aqp1',	'Serinc3',	'Nrp1',	'Ablim3']
venEC_sign = ['Aqp1',	'Pecam1',	'Plvap',	'Ctla2a',	'Egfl7',	'Eng',	'Fabp4',	'Lrg1',	'Ptprb',	'Flt1',	'Tspan7',	'Tmsb4x',	'Mmrn2',	'Vwf',	'Emcn',	'Cdh5',	'Mecom',	'Il6st',	'Ldb2',	'Esam',	'Fli1',	'Gnai2',	'Scarb1',	'Gng11',	'Prkch',	'Ecscr',	'Ablim1',	'Cyyr1',	'Epas1',	'Adgrf5',	'Gimap6',	'Myh9',	'Grrp1',	'Cav1',	'Selp',	'Ptprm',	'Ehd4',	'Fry',	'Abcg2',	'Adgrl4',	'Arhgap31',	'Pdlim1',	'Cd93',	'Tm4sf1',	'2200002D01Rik',	'Ece1',	'St6galnac3',	'Ndrg1',	'Cd200',	'Cd36']
capsFB_sign = ['Pi16',	'Timp2',	'Fn1',	'Mfap5',	'Clec3b',	'Cd248',	'Opcml',	'Nid1',	'Pcolce2',	'Igfbp6',	'Fbn1',	'Tmem100',	'Fndc1',	'Ackr3',	'Fstl1',	'Ebf2',	'Creb5',	'Col14a1',	'Sema3c',	'Anxa3',	'Pla1a',	'Adgrd1',	'Pcsk6',	'Smpd3',	'Dpt',	'Ddr2',	'Axl',	'Tnxb',	'Ogn',	'Loxl1',	'Dpp4',	'Adamts5',	'Islr',	'Plpp3',	'Scara5',	'Ly6c1',	'Lsp1',	'Efhd1',	'Cd34',	'Gfpt2',	'Tppp3',	'Col1a1',	'Sdk1',	'Col3a1',	'Rnase4',	'Emilin2',	'Metrnl',	'Sparc',	'Col1a2',	'Tgfbr2']
intFB_sign = ['Gsn',	'Dcn',	'Serpinf1',	'Smoc2',	'Lrp1',	'Lum',	'Htra3',	'Pcolce',	'Col3a1',	'Gpx3',	'Col1a2',	'Abca8a',	'Lpl',	'Celf2',	'Cygb',	'Fbln1',	'Selenop',	'Slit3',	'Gas1',	'Col1a1',	'Mmp2',	'Dpep1',	'Penk',	'Col15a1',	'Inmt',	'Serpinh1',	'Clec3b',	'Rbp1',	'Igfbp4',	'Dpt',	'Pdgfra',	'Mgst1',	'Igf1',	'Ftl1',	'Lama2',	'Serping1',	'Rnase4',	'Il11ra1',	'Mfap5',	'Ogn',	'Tmsb10',	'Cd302',	'Svep1',	'Aebp1',	'Bgn',	'Plxdc2',	'Nbl1',	'Ifitm2',	'Itm2a',	'Adamts12']
medFB_sign = ['Serpine2',	'Bgn',	'Csmd1',	'Enpp2',	'Ptn',	'Apod',	'Igfbp7',	'Lhfp',	'Lsamp',	'Hsd11b1',	'Ifitm1',	'Ltc4s',	'Ccl19',	'Cd63',	'Colec12',	'Des',	'Col15a1',	'Mgp',	'Ank2',	'Sparcl1',	'Cp',	'Spon1',	'Gpm6b',	'Il34',	'Tmem176a',	'Tmem176b',	'Apoe',	'C3',	'Ltbp1',	'Lama2',	'Cygb',	'Tcf4',	'Ndufa4l2',	'Cd9',	'Lum',	'Col6a2',	'Abi3bp',	'Crispld2',	'Dcn',	'Laptm4a',	'Ifitm2',	'G0s2',	'Jun',	'Mfge8',	'Cd302',	'Il1r1',	'Nrp1',	'Pde7b',	'Prelp',	'Cd81']
MEC_sign = ['Gpm6a',	'Nkain4',	'Upk3b',	'Clu',	'Gm12840',	'Igfbp6',	'Abi1',	'Aebp1',	'Trf',	'Crip1',	'Krt19',	'Igfbp5',	'Rspo1',	'Wt1',	'Mpp6',	'Upk1b',	'Rarres2',	'Msln',	'Il1rapl1',	'Plxna4',	'C2',	'Efemp1',	'Adamtsl1',	'Sntg1',	'Pkhd1l1',	'Sox6',	'Aldh1a2',	'C4b',	'Gpc3',	'Kcnd2',	'Lgals7',	'Gas6',	'Cldn15',	'Lrrn4',	'Mgp',	'Hspb1',	'Csrp2',	'Cav1',	'Bicd1',	'Tmem151a',	'Wdr17',	'Cldn10',	'Cavin2',	'Efna5',	'Ptgis',	'Flrt2',	'Ezr',	'1010001N08Rik',	'C3',	'Muc16']
PCvSMC_sign = ['Myl9',	'Cald1',	'Gm13889',	'Tpm2',	'Acta2',	'Prkg1',	'Tagln',	'Tpm1',	'Rgs5',	'Ctnna3',	'Tinagl1',	'Cacna1c',	'Crip1',	'Myh11',	'Notch3',	'Mylk',	'Dmd',	'Myl6',	'Sncg',	'Mustn1',	'Ndufa4l2',	'Sparcl1',	'Dgkb',	'Gng11',	'Kcnab1',	'Ppp1r12a',	'Higd1b',	'Des',	'Pde3a',	'Pcp4l1',	'Rcan2',	'Bcam',	'Calm2',	'Gucy1a1',	'Mef2c',	'Ptp4a3',	'Aspn',	'Cacnb2',	'Cpe',	'Rgs4',	'Igfbp7',	'Csrp1',	'Gucy1b1',	'Lmod1',	'Cox4i2',	'Mfge8',	'Timp3',	'Tm4sf1',	'Ppp1r14a',	'Malat1']
nmSC_sign = ['Csmd1',	'Kcna1',	'Fxyd1',	'Plp1',	'Dlgap1',	'Prnp',	'Cd9',	'Gfra3',	'Stard13',	'Vwa1',	'Cdh19',	'Malat1',	'Dbi',	'Slc35f1',	'Aspa',	'Scn7a',	'Cd59a',	'Zeb2',	'S100b',	'Cadm2',	'Matn2',	'Nkain2',	'Cryab',	'Gpm6b',	'Gpr37l1',	'Art3',	'Apoe',	'Cnp',	'Vim',	'Sox10',	'Zfp536',	'Lgi4',	'Nrn1',	'Lgals3',	'Sgcd',	'Fcgr2b',	'Ncam1',	'Frmd4a',	'Sat1',	'Chl1',	'Cadm1',	'S100a6',	'Prkca',	'Gatm',	'Fign',	'Pmp22',	'Kcna2',	'Sfrp5',	'Fgl2',	'Csrp1']
fat_sign = ['Mgst1',	'Lpl',	'Dbi',	'Apoe',	'Hp',	'Car3',	'Aoc3',	'Mpc2',	'Ndufa4',	'G0s2',	'Cebpa',	'Fabp4',	'Rarres2',	'Rbp4',	'Plin2',	'Vim',	'mt-Atp6',	'mt-Co1',	'mt-Co3',	'mt-Nd1',	'Angptl4',	'Pparg',	'Eif4ebp1',	'Adam12',	'Adipoq',	'Ndufb9',	'Cox7b',	'Mmd',	'Etfb',	'Nnmt',	'Slc1a5',	'Slc24a3',	'Gsn',	'Selenbp1',	'Fcor',	'mt-Nd4',	'Uqcr11',	'Steap4',	'Adhfe1',	'mt-Cytb',	'Arhgap24',	'Prkar2b',	'Chchd2',	'Chpt1',	'Plin1',	'Atp5g3',	'Hadh',	'Zeb2',	'Aldh2',	'Col15a1']
aaTEC1_sign = ['Krt18',	'Epcam',	'Trpm3',	'Cd24a',	'Wfdc18',	'Krt8',	'Spint2',	'Ifi27l2a',	'Meis2',	'Cldn3',	'Ly6e',	'Slc16a11',	'Slc9a3r1',	'S100a11',	'Btg1',	'Perp',	'Gsta4',	'Isl1',	'Fxyd3',	'Pbx1',	'Cdk19',	'Ptprd',	'mt-Co1',	'Taldo1',	'BC006965',	'Apobec3',	'S100a1',	'Isg15',	'Pde4b',	'Oasl2',	'Anxa2',	'Aldoc',	'Eya1',	'Socs2',	'Stat1',	'Sdc4',	'Rtp4',	'2610307P16Rik',	'Fam107a',	'Mif',	'mt-Co3',	'Cdh1',	'Cd74',	'Atp1a1',	'Fbxo2',	'Nedd4l',	'Shank2',	'Eno1',	'Marcksl1',	'mt-Nd1']
aaTEC2_sign = ['Csmd1',	'Ndrg2',	'Ccl19',	'Trpm3',	'Cd74',	'Nav2',	'Gadd45g',	'H2-Ab1',	'Atp1b1',	'Pde4b',	'Csrp1',	'H2-Eb1',	'Btg1',	'Meis2',	'Mir100hg',	'Ccl21a',	'Socs2',	'Iigp1',	'Tagln',	'Socs3',	'H2-Aa',	'Kirrel3',	'Ptprd',	'Notch3',	'Tagln2',	'Heyl',	'Gucy1a1',	'Bcl2',	'Gm48742',	'Gpx3',	'Eya4',	'Cnn3',	'Ncam1',	'Art3',	'Cd9',	'Eya1',	'Apoe',	'Pbx1',	'Ar',	'Nhs',	'Olfm2',	'Cul1',	'Fos',	'Palld',	'Phlda1',	'Jmjd1c',	'Sncaip',	'Cacna1c',	'Sdc4',	'Arid5b']
cTEC_sign = ['Krt18',	'Cstb',	'Krt8',	'Ctsl',	'Ank3',	'Ndufa11',	'Tbata',	'H2-Ab1',	'Slc46a2',	'Prxl2b',	'H2-Aa',	'Prss16',	'Cd74',	'Gas6',	'Fabp5',	'Nlgn1',	'Pax1',	'H2-Eb1',	'Pltp',	'Ccl25',	'Wnt4',	'Tmem131l',	'Rbfox1',	'Psmb9',	'Dpp6',	'Shisa2',	'AI646519',	'Psmb11',	'H2-DMa',	'Bnip3l',	'Krt5',	'Sfn',	'H2-DMb2',	'Tsc22d1',	'Snhg11',	'Krt17',	'Trp63',	'Ctnnd2',	'Nav2',	'Atpif1',	'Kctd1',	'Ccl21a',	'Sh2d4b',	'Perp',	'Tenm4',	'Dsp',	'Spint2',	'Ociad2',	'Ndrg3',	'Apobec3']
mTEC1_sign = ['H2-Ab1',	'Krt5',	'H2-Eb1',	'H2-Aa',	'Ifi27l2a',	'Ank3',	'Cd74',	'Ccl21a',	'Krt14',	'Ifitm3',	'Krt18',	'Perp',	'Fxyd3',	'Eya4',	'Epcam',	'Spint2',	'Isg15',	'Oasl2',	'Sfn',	'Meis2',	'Mif',	'Krt17',	'H2-DMa',	'Atp1a1',	'Rbfox1',	'Ly6e',	'Krt8',	'Nxn',	'Ctnnd2',	'Rtp4',	'Kcnma1',	'H3f3b',	'Apobec3',	'Ptprd',	'Pbx1',	'Apoe',	'Atp1b1',	'Trp63',	'Atpif1',	'Gas6',	'Eya1',	'Urah',	'Itga6',	'Dsp',	'H2-DMb1',	'Marcksl1',	'Csrp1',	'Fcgbp',	'B2m',	'Cdh1']
mTECprol_sign = ['Krt17',	'Tpm2',	'Ascl1',	'Ccl21a',	'Ptma',	'Krt7',	'Spint2',	'Krt5',	'Cd74',	'Marcksl1',	'H3f3b',	'H2-Eb1',	'Wfdc18',	'Sox4',	'H2-Aa',	'H2-Ab1',	'Hes6',	'Epcam',	'Ubd',	'Skint10',	'Krt8',	'Krt14',	'Tubb2b',	'Pfn1',	'Actg1',	'Prxl2b',	'Fcgbp',	'Mapk13',	'H2afy2',	'Rgs5',	'Mif',	'Cd82',	'Mdk',	'H2-DMb2',	'Gm49708',	'Kcnq3',	'Ppia',	'Srgn',	'Cdk4',	'Adm',	'Hagh',	'Fezf2',	'Hsp90ab1',	'S100a14',	'Hnrnpa1',	'Cib1',	'H2-Q7',	'Ank3',	'Perp',	'Slc25a5']
mTEC2_sign = ['Cd74',	'Srgn',	'Ubd',	'H2-Aa',	'H2-Eb1',	'Syt1',	'S100a14',	'H2-Ab1',	'H2-DMb2',	'Cyba',	'H2-Oa',	'Mrpl38',	'Lrrc42',	'Marcksl1',	'Hagh',	'Fabp5',	'Cd52',	'Fezf2',	'Aire',	'Cib1',	'Bspry',	'Krt17',	'Hspb11',	'Plb1',	'Krt8',	'Fcgbp',	'Cdx1',	'Psme2',	'Dpp10',	'Dio1',	'Spint2',	'Ctss',	'H2-Eb2',	'Skint10',	'Txn1',	'Ankrd33b',	'Tnfrsf11a',	'Fam89a',	'Syngr2',	'Calcb',	'Nfkbia',	'Ndufc2',	'Laptm5',	'Utf1',	'Cox17',	'Il4i1',	'Hdc',	'Ing1',	'Gm47938',	'Csn2']
mTEC3_sign = ['Ly6d',	'Fxyd3',	'Sfn',	'Perp',	'Dapl1',	'Krt17',	'Dsp',	'Tacstd2',	'Dmkn',	'Skint3',	'Spink5',	'H2afj',	'Dstn',	'Epcam',	'H2-K1',	'Oit1',	'Cdh1',	'Cst6',	'Gsta4',	'Hspb1',	'Fabp5',	'Pdzk1ip1',	'Cdkn2b',	'Urah',	'S100a14',	'Cdkn2a',	'H2-Q7',	'Spint2',	'Bcl2a1b',	'Cldn4',	'Atox1',	'Vamp8',	'Calml3',	'Lypd3',	'Avpi1',	'Ldhb',	'Rab11a',	'Jup',	'4833423E24Rik',	'Prdx5',	'Krt23',	'Atp1b1',	'Cdkn1a',	'Serpinb2',	'Fcgbp',	'Rbm47',	'Sdc1',	'Trim29',	'Gsta2',	'Dgat2']
tuft_sign = ['Mctp1',	'Gng13',	'Avil',	'Rgs13',	'Espn',	'Cystm1',	'Anxa4',	'Lrmp',	'Ptpn18',	'Ivns1abp',	'Bmx',	'Fyb',	'Calm2',	'Ly6g6f',	'Ethe1',	'Reep5',	'1810046K07Rik',	'Smim22',	'Ociad2',	'Ptpn6',	'Abhd2',	'Cd24a',	'Ehf',	'Pde4d',	'Chil1',	'St18',	'Dgki',	'Pou2f3',	'Pik3r5',	'Trpm5',	'Lima1',	'Vav1',	'Alox5ap',	'Alox5',	'Krt8',	'Scand1',	'Tnc',	'Cox8a',	'Oxr1',	'Krt18',	'H2afj',	'Rab25',	'Gpcpd1',	'Strip2',	'Cox17',	'Sh2d6',	'Rassf6',	'Macrod2',	'Cldn7',	'Plk2']
nTEC_sign = ['Ptprn2',	'Cacna2d1',	'Scg5',	'Syt1',	'Krt8',	'Car8',	'Stxbp5l',	'Chga',	'Ccser1',	'Snap25',	'Cystm1',	'Krt7',	'Smim22',	'Cd9',	'Alcam',	'Krt18',	'Dnajc12',	'Pip5k1b',	'Epcam',	'Cldn7',	'Syt7',	'Kcnb2',	'Insm1',	'Cd24a',	'Cdh1',	'Cadps',	'Ceacam10',	'Resp18',	'Tsc22d1',	'Rims2',	'Cacna1a',	'Pcbd1',	'Cacnb2',	'Cplx2',	'Fam183b',	'Emb',	'AC149090.1',	'Rap1gap2',	'Aopep',	'Nol4',	'Stard10',	'Tmem163',	'5330417C22Rik',	'Pam',	'Btg2',	'Fhl2',	'Spint2',	'Actg1',	'A230057D06Rik',	'Guk1']
goblet_sign = ['Wfdc2',	'Fxyd3',	'H2-K1',	'Cyp2f2',	'Spint2',	'Atp1b1',	'S100a11',	'Gsto1',	'Cxcl17',	'Alcam',	'Krt18',	'Ly6e',	'Ly6d',	'Epcam',	'Ifi27l2a',	'Cbr2',	'Sorbs2',	'Smim22',	'Pglyrp1',	'H2-Q7',	'Serpinb11',	'Krt8',	'Krt19',	'Vamp8',	'Slc12a2',	'Cd24a',	'Irf7',	'Cldn3',	'Slc16a11',	'Gsta4',	'Cldn7',	'B2m',	'Elf3',	'Oasl2',	'Tst',	'Atp1a1',	'Aqp5',	'Sfn',	'Cdh1',	'Ezr',	'Tspan8',	'Tacstd2',	'Perp',	'Runx1',	'Eya2',	'Lmo7',	'Cd74',	'Rbm47',	'Chchd10',	'Bace2']
Mlike_sign = ['Ccl20',	'Ccl9',	'Serpinb6a',	'Serpinb1a',	'Tmsb4x',	'2200002D01Rik',	'Nostrin',	'Ubd',	'Csn2',	'Ctsh',	'Pold1',	'Bcl2a1d',	'Spib',	'Plb1',	'Fabp5',	'Marcksl1',	'Ccl6',	'Spint2',	'Cyp2a5',	'Clu',	'Bcl2a1b',	'Pglyrp1',	'Fabp1',	'Hamp',	'AW112010',	'Atox1',	'H2-M2',	'Atp6v1c1',	'Bcl2a1a',	'Krt20',	'Gjb2',	'Epcam',	'Iscu',	'Vamp8',	'Sephs2',	'4930520O04Rik',	'Mrpl38',	'Cldn7',	'Rac2',	'AA467197',	'Hspe1',	'Cib1',	'Dsg1a',	'Ahcyl2',	'Fxyd3',	'Tnfrsf11b',	'Srgn',	'Sept1',	'Fcgbp',	'Hist1h2bc']

In [None]:
aaTEC1_sign = ['Krt18',	'Epcam',	'Trpm3',	'Cd24a',	'Wfdc18',	'Krt8',	'Spint2',	'Ifi27l2a',	'Meis2',	'Cldn3',	'Ly6e',	'Slc16a11',	'Slc9a3r1',	'S100a11',	'Btg1',	'Perp',	'Gsta4',	'Isl1',	'Fxyd3',	'Pbx1',	'Cdk19',	'Ptprd',	'mt-Co1',	'Taldo1',	'BC006965',	'Apobec3',	'S100a1',	'Isg15',	'Pde4b',	'Oasl2',	'Anxa2',	'Aldoc',	'Eya1',	'Socs2',	'Stat1',	'Sdc4',	'Rtp4',	'2610307P16Rik',	'Fam107a',	'Mif',	'mt-Co3',	'Cdh1',	'Cd74',	'Atp1a1',	'Fbxo2',	'Nedd4l',	'Shank2',	'Eno1',	'Marcksl1',	'mt-Nd1',	'Sntb1',	'Auts2',	'Kif19a',	'Bmp6',	'Dbi',	'Pkp4',	'Sfn',	'Id2',	'Ehf',	'Arl4c',	'Chchd10',	'H2-Ab1',	'Slc5a8',	'H2-Aa',	'Skp1a',	'Nav2',	'Sox9',	'Kcnk1',	'Etv6',	'Kirrel3',	'Mir100hg',	'Prxl2a',	'Thsd4',	'Cnn3',	'Rbm47',	'Mcc',	'Rdh10',	'Rhov',	'Plet1',	'Six1',	'mt-Cytb',	'Cited4',	'Ank3',	'Atp6v1g1',	'Atp1b1',	'Pax9',	'Galm',	'Cracr2b',	'Eya4',	'Fgf13',	'H2-Eb1',	'Rtl4',	'Cd9',	'Strbp',	'Gna14',	'Igkc',	'Dclk2',	'Vamp8',	'Nipal2',	'Tenm4']

In [None]:
aaTEC1_sign_cherrypicked = ['Trpm3','Cldn3',	'Slc16a11',	'Slc9a3r1','Isl1',	'Cdk19',	'Ptprd',	'Taldo1',	'BC006965', 'Aldoc',	'Socs2',	'Fam107a','Fbxo2',	'Nedd4l',	'Shank2','Kif19a',	'Bmp6',	'Id2',	'Arl4c',	'Slc5a8',	'Kcnk1',	'Kirrel3',		'Prxl2a',	'Thsd4',	'Rdh10',	'Rhov',	'Plet1',	'Cited4',	'Galm',	'Rtl4',	'Gna14',	'Dclk2',	'Nipal2']

In [None]:
sc.pl.dotplot(adata_d0, aaTEC1_sign,  groupby='cell_type_subset',vmax=3, cmap=user_defined_cmap_degs)

In [None]:
aaTEC2_sign = ['Csmd1',	'Ndrg2',	'Ccl19',	'Trpm3',	'Cd74',	'Nav2',	'Gadd45g',	'H2-Ab1',	'Atp1b1',	'Pde4b',	'Csrp1',	'H2-Eb1',	'Btg1',	'Meis2',	'Mir100hg',	'Ccl21a',	'Socs2',	'Iigp1',	'Tagln',	'Socs3',	'H2-Aa',	'Kirrel3',	'Ptprd',	'Notch3',	'Tagln2',	'Heyl',	'Gucy1a1',	'Bcl2',	'Gm48742',	'Gpx3',	'Eya4',	'Cnn3',	'Ncam1',	'Art3',	'Cd9',	'Eya1',	'Apoe',	'Pbx1',	'Ar',	'Nhs',	'Olfm2',	'Cul1',	'Fos',	'Palld',	'Phlda1',	'Jmjd1c',	'Sncaip',	'Cacna1c',	'Sdc4',	'Arid5b',	'Gm4951',	'Bex3',	'Pcdh17',	'Esr1',	'Nfia',	'H3f3b',	'Cacnb2',	'Btg2',	'Cd200',	'Pgf',	'Mif',	'Frmpd4',	'Zeb2',	'Malat1',	'Rgs2',	'Zfp36l1',	'Grin2a',	'2610307P16Rik',	'Sbno2',	'Junb',	'Igkc',	'Lmna',	'Jun',	'Serpine2',	'Mgp',	'H2-DMa',	'Itm2b',	'Dnajc6',	'Lpar1',	'Runx1',	'Isl1',	'Tceal9',	'Dnm3',	'Mdk',	'Xist',	'Ptn',	'Isg15',	'Pth',	'Laptm4a',	'Clu',	'Fmnl2',	'Zbtb20',	'4632427E13Rik',	'Egr1',	'Tpm4',	'Cirbp',	'Nr4a1',	'Ptpn1',	'Deptor',	'Sema3a']

In [None]:
aaTEC2_sign_cherrypicked = ['Trpm3','Cldn3',	'Slc16a11',	'Slc9a3r1','Isl1',	'Cdk19',	'Ptprd',	'Taldo1',	'BC006965', 'Aldoc',	'Socs2',	'Fam107a','Fbxo2',	'Nedd4l',	'Shank2','Kif19a',	'Bmp6',	'Id2',	'Arl4c',	'Slc5a8',	'Kcnk1',	'Kirrel3',		'Prxl2a',	'Thsd4',	'Rdh10',	'Rhov',	'Plet1',	'Cited4',	'Galm',	'Rtl4',	'Gna14',	'Dclk2',	'Nipal2']

In [None]:
sc.pl.dotplot(adata_d0, aaTEC2_sign,  groupby='cell_type_subset', vmax=2, cmap=user_defined_cmap_degs)

In [None]:
sc.pl.dotplot(adata_d0, aaTEC2_sign,  groupby='stage', vmax=2, cmap=user_defined_cmap_degs)

In [None]:
sc.pl.umap(adata_d0, color=['stage', 'Mmp9', 'Il6', 'Bmp7', 'Ppp1r15a', 'Smurf1',  'Bmpr1a', 'Bmpr1b', 'Bmpr2', 'Fst', 'Fgf8', 'Nog', 'Grem1', 'Prkce', 'Xbp1', 'Trpm3', 'Aldoc', 'Fbxo2', 'Shank2', 'Kif19a', 'Bmp6', 'Tgfbr1', 
                            'Smad1', 'Smad5', 'Tgfbr2', 'Tgfbr3', 'Acvr2a', 'Acvr2b',  'Cd44',
                            'Smad2', 'Smad3', 'Bmpr2', 'Bmp7'], 
                        color_map='Spectral_r', 
                        use_raw=False, 
                        ncols=5, 
                        wspace = 0.5,
                        outline_width=[0.6, 0.05], 
                        size=15,  
                        frameon=False, 
                        add_outline=True, 
                        sort_order = False)

#### 18mo vs 02mo

In [None]:
adata_d0.obs['stage_and_subset'] = ''
adata_d0.obs['stage_and_subset'] = adata_d0.obs[['stage', 'cell_type_subset']].agg('_'.join, axis=1)

In [None]:
writer = pd.ExcelWriter('adata_18vs02mo_d0_wilcox.xlsx', engine='xlsxwriter')

for subset in ["0:arEC", "1:capEC", "2:venEC", "3:capsFB", "4:intFB", "5:medFB", "6:MEC", "7:vSMC/PC", "8:nmSC","9:Fat", "10:aaTEC1",  "11:aaTEC2",           
               "12:cTEC", "13:mTEC1", "14:mTEC-prol", "15:mTEC2", "16:mTEC3", "17:mimic(tuft)", "18:mimic(neuroendo)", "19:mimic(goblet)", "20:mimic(microfold)"]:   
    sc.tl.rank_genes_groups(adata_d0, 'stage_and_subset', groups=['18mo_'+subset], reference='02mo_'+subset, method='wilcoxon', use_raw=False)
    result = adata_d0.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    pd.DataFrame(
        {group + '_' + key[:1]: result[key][group]
        for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer, sheet_name=re.search('.*:(.+)', subset).group(1).replace('/', '-'))
        
writer.save()

In [None]:
%load_ext rpy2.ipython

In [None]:
%R if (!require("pacman")) install.packages("pacman")
%R pacman::p_load(MAST, scales, data.table, openxlsx, ggplot2, ggpubr, RColorBrewer, dichromat, readxl, ggpubr, pheatmap, dplyr, arrow, feather, DelayedArray, HDF5Array, stringr, parallel)

#### Convert wilcoxon results to gsea ranks

In [None]:
%%R 

# read in all available excel sheet names 
wilcox_results = excel_sheets('adata_d0_wilcox.xlsx')

rnk_items_list = NULL

for (item in wilcox_results) {
      wilcox_result <- read_excel('adata_d0_wilcox.xlsx', sheet = item)
      rnk_item = na.omit(wilcox_result[,c(2,3)])
      rnk_item_sorted = rnk_item[order(rnk_item[,2], decreasing = TRUE),]
      colnames(rnk_item_sorted)[1] = '#primerid' # comment out header
      colnames(rnk_item_sorted)[2] = '#rank_score' # comment out header
      #rnk_items_list[[sheet]] = rnk_item_sorted
     # write.table(rnk_item_sorted, file = paste0('../output/metadata/gsea_items/input_ranks/wilcox_result_', item, '.rnk'), sep='\t', row.names = FALSE, quote = FALSE)
}

### Dotchart using score and fdr from wilcox

In [None]:
%%R 

wilcox_results = excel_sheets('../output/metadata/wilcox_items/adata_18vs02mo_d0_wilcox.xlsx')
wilcox_results_combined = NULL
for (item in wilcox_results) {
    wilcox_result <- read_excel('../output/metadata/wilcox_items/adata_18vs02mo_d0_wilcox.xlsx', sheet = item)
    colnames(wilcox_result) <- c('index', 'name', 'score', 'log2_fc', 'p_adj')
    wilcox_result$p_adj[wilcox_result$p_adj == 0] <- min(wilcox_result$p_adj[wilcox_result$p_adj>0])
    wilcox_result$`-log10(p_adj)` = -log(wilcox_result$p_adj, 10)
    wilcox_result$subset = item
    wilcox_result_sorted = wilcox_result[order(wilcox_result$score, decreasing = TRUE),]
    wilcox_result_sorted = wilcox_result_sorted[wilcox_result_sorted$p_adj<=0.05,]
    wilcox_results_combined = bind_rows(wilcox_results_combined, wilcox_result_sorted) # select # of top genes per subset 
}


In [None]:
%%R

cytoscape_emt_genes = c('Igf1', 'Lgals9', 'Fn1', 'Fasl', 'Dspp', 'Chad', 'Dmp1', 'Cdh1', 'Cdh2', 'Cd40lg', 'Fcer2a', 'Cadm1', 'Ibsp', 'Angpt2', 'Angptl2', 'Angptl3', 'Angptl4', 'Ptn', 'Mdk', 'Angptl4', 'Mgp',	'Serpine2',	'Lgals1',	'Sparc',	'Jun',	'Col3a1',	'Lum',	'Bgn',	'Serpinh1',	'Vim',	'Col6a2',	'Fbn1',	'Dcn',	'Ccn1',	'Spp1',	'Col1a2',	'Igfbp3',	'Col1a1',	'Matn2',	'Abi3bp',	'Htra1',	'Pmp22',	'Fbln1',	'Col5a2',	'Pcolce',	'Ecm1',	'Efemp2',	'Mylk',	'Lox',	'Postn',	'Il6',	'Basp1',	'Col5a3',	'Timp3',	'Plaur',	'Tgm2',	'Col4a1',	'Serpine1',	'Gadd45a',	'Lama2',	'Loxl1',	'Col6a3',	'Itgav',	'Timp1',	'Lamc1',	'Qsox1',	'Vegfc',	'Fap',	'Ppib',	'Mmp2',	'Tgfbr3',	'Inhba',	'Thy1',	'Plod1',	'Dab2',	'Fbn2',	'Nnmt',	'Vcan',	'Col4a2',	'Thbs2',	'Ecm2',	'Sfrp4',	'Itgb1',	'Ptx3',	'Col5a1',	'Fmod',	'Col16a1',	'Bmp1',	'Flna',	'Fgf2',	'Wnt5a',	'Fbln2',	'Pvr',	'Grem1',	'Comp',	'P3h1',	'Itga2',	'Vegfa',	'Cxcl12',	'Col8a2',	'Matn3',	'Tnfaip3',	'Col11a1',	'Cxcl15',	'Oxtr',	'Foxc2',	'Dkk1',	'Nt5e',	'Col7a1',	'Plod3',	'Snai2',	'Cxcl5',	'Nid2',	'Itgb3',	'Slit2',	'Eln',	'Pthlh',	'Thbs1',	'Fn1',	'Lrrc15',	'Scg2',	'Fbln5',	'Mcm7',	'Lama1',	'Pcolce2',	'Eno2',	'Mfap5',	'Copa',	'Bdnf',	'Fas',	'Tgfb1',	'Tgfbi',	'Lama3',	'Col12a1',	'Gpc1',	'Slc6a8',	'Sfrp1',	'Cadm1',	'Gja1',	'Lamc2',	'Tnc')

In [None]:
%%R

cellchat_emt_L = c('Angpt2', 'Angptl2', 'Angptl3', 'Angptl4', 'Ibsp', 'Cadm1', 'Fcer2a', 'Cd40lg', 'Cd96', 'Cdh1', 'Cdh2',
                  'Chad', 'Dmp1', 'Dspp', 'Fasl', 'Fn1', 'Lgals9', 'Igf1', 'Jam2', 'L1cam', 'Lama1', 'Lama2', 'Lama3', 'Lama4',
                  'Lama5', 'Lamb1', 'Lamb2', 'Lamb3', 'Lamc1', 'Lamc2', 'Lamc3', 'Mdk', 'Wnt5a', 'Nectin3', 'Dlk1', 'Dll3',
                  'Dll4', 'Jag2', 'Dll1', 'Nrg1', 'Oxt', 'Pdgfa', 'Pdgfb', 'Pdgfd', 'Postn', 'Ptn', 'Tnfsf11', 'Sele', 'Spp1',
                  'Tnr', 'Tnc', 'Tnxb', 'Col1a1', 'Col1a2', 'Col2a1', 'Col4a1', 'Col4a2', 'Col4a3', 'Col4a4', 'Col4a5', 'Col4a6',
                  'Col6a1', 'Col6a2', 'Col6a3', 'Col6a4', 'Col6a5', 'Col6a6', 'Col9a1', 'Col9a2', 'Col9a3', 'Thbs1', 'Thbs2', 'Thbs3',
                  'Thbs4', 'Comp', 'Thy1', 'Tigit', 'Tnfsf12', 'Nampt', 'Vtn', 'Vwf')

In [None]:
%%R

cellchat_emt_L = c('Cxcl12',	'Ccl19',	'Ccl21a',	'Ccl25',	'Fgf1',	'Fgf2',	'Fgf7',	'Fgf10',	'Fgf18',	'Fgf21', 	'Bmp4',	'Bmp7',	'Flt3l',	'Kitl', 'Il7',
                   'Nrg1', 'Lama1', 'Igf1', 'Col4a5', 'Lama2', 'Cadm1', 'Mdk', 'Ptn', 'Vtn', 'Thbs1', 'Tnxb', 'Angptl4', 'Il6', 'Lgals1','Ptx3', 'Serpine1', 'Tgfbi', 'Mgp', 'Tgfbr3')



In [None]:
%%R
tt = wilcox_results_combined[wilcox_results_combined$name %in% cellchat_emt_L,]


In [None]:
%%R 
#pdf("dotplot_Zscore_d0.pdf", width=4, height=4.5)
tt = wilcox_results_combined[wilcox_results_combined$name %in% cellchat_emt_L,]
tt$name <- factor(tt$name, levels = rev(c('Cxcl12',	'Ccl19',	'Ccl21a',	'Ccl25',	'Fgf7', 'Fgf1',	'Fgf2',		'Fgf10',	'Fgf18',	'Fgf21',	'Bmp4',	'Bmp7',	'Flt3l',	'Kitl', 'Il7',
                                          'Lgals1','Ptx3', 'Angptl4', 'Il6', 'Tnxb', 'Thbs1', 'Vtn', 'Ptn', 'Mdk',  'Cadm1',  'Col4a5', 'Serpine1', 'Lama2',  'Tgfbr3','Tgfbi', 'Mgp', 'Igf1',  'Lama1', 'Nrg1')))
tt$subset <- factor(tt$subset, levels = c('capsFB', 'intFB', 'medFB', 'capEC', 'cTEC', 'mTEC1', 'mTEC-prol', 'mTEC2', 'MEC'))
tt$`squished Z-score` = squish(tt$score, range=c(-3, 3), only.finite=TRUE)


In [None]:
%%R -w 10.5 -h 15.75 -u cm

#pdf("dotplot_Zscore_d0_emtL-up.pdf", width=4.15, height=6.05)
# plot marker genes in a dotplot format 
# use BluetoGreen.14 colorscheme for degs
print(ggdotchart(tt, x='name', y='subset', group = 'subset',rotate=TRUE, color='squished Z-score', size = '-log10(p_adj)', sorting='none', xlab = "",  ylab = "") +
scale_color_gradientn(colours = dichromat::colorschemes$BluetoGreen.14) +  
theme_pubr() + theme(legend.position='right', axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))

#dev.off() #

In [None]:
%%R -w 11.25 -h 16.5 -u cm

pdf("dotplot_Zscore_d0_emtL-up.pdf", width=4.3, height=6.55)
# plot marker genes in a dotplot format 
# use BluetoGreen.14 colorscheme for degs
print(ggdotchart(tt, x='name', y='subset', group = 'subset',rotate=TRUE, color='squished Z-score', size = '-log10(p_adj)', sorting='none', xlab = "",  ylab = "") +
scale_color_gradientn(colours = dichromat::colorschemes$BluetoGreen.14) +  
theme_pubr() + theme(legend.position='right', axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))

dev.off() #

### Cytoscape summarized results

In [None]:
%%R

fromNetwork <- read_excel("../output/metadata/cytoscape_items/18vs02mo_m5_annotated.xlsx", sheet="Sheet2")
fromNetwork_clean <- select(fromNetwork, -contains(c("::Dataset_Chart", "::Genes", "::GS_DESCR", "::Name", "shared name", "selected", 
                                                     "::GS_Type", "Colouring", "::ES", "::fwer_qvalue", "::pvalue",
                                                     "aaTEC1", "aaTEC2", "Fat", "MEC", "nmSC", "PCvSMC" )))
#fromNetwork_clean = fromNetwork_clean  %>% group_by(Group) %>% slice_min(order_by = minFDR, n = 5)

forDotplot <- as_tibble(lapply(fromNetwork_clean[,c(16, 32, 33,34)], rep, ncol(select(fromNetwork_clean,starts_with("EnrichmentMap::fdr_qvalue")))))


tmpFDR = NULL
FDR=NULL
tmpNES = NULL
NES=NULL
tmp_subset = NULL
subset=NULL

# append (in order) all FDR columns and all NES columns
for (i in c(1:ncol(select(fromNetwork_clean ,starts_with("EnrichmentMap::fdr_qvalue"))))) {
  tmpFDR = select(fromNetwork_clean,starts_with("EnrichmentMap::fdr_qvalue"))[i]
  colnames(tmpFDR) = "FDR"
  FDR = rbind(FDR, tmpFDR)
}

for (i in c(1:ncol(select(fromNetwork_clean,starts_with("EnrichmentMap::NES"))))) {
  tmpNES = select(fromNetwork_clean,starts_with("EnrichmentMap::NES"))[i]
  colnames(tmpNES) = "NES"
  NES = rbind(NES, tmpNES) 
}

forDotplot$NES = NES$NES
forDotplot$FDR = FDR$FDR

forDotplot$subset = rep(str_replace(colnames(select(fromNetwork_clean,starts_with("EnrichmentMap::NES"))), "EnrichmentMap::NES ", ""), each=nrow(fromNetwork_clean))
colnames(forDotplot) <- c("gs_size", "name", "group", "minFDR", "NES", "FDR", "subset")
forDotplot$FDR[forDotplot$FDR>0.05] <- NA
forDotplot$FDRtr = -log(forDotplot$FDR+0.00001,10)
forDotplot$subset <- factor(forDotplot$subset, levels = c("(capsFB)", "(intFB)", "(medFB)", "(artEC)", "(capEC)", "(venEC)", "(cTEC)", "(mTEC1)", "(mTECprol)", "(mTEC2)", "(mTEC3)", "(tuft)", "(nTEC)", "(goblet)", "(Mlike)"))

In [None]:
%%R -w 22 -h 20 -u cm 

pdf("dotplot_pathways_d0_top5.pdf", width=8.5, height=7)

print(ggdotchart(forDotplot,  x='name', y='subset', group = 'group',  size='FDRtr', rotate=TRUE, color = 'NES', xlab = "", ylab = "", sorting = "none") +
  scale_colour_gradientn(colours = dichromat::colorschemes$BluetoGreen.14) +
  theme_pubclean() + theme(axis.text.x = element_text(face ="bold", angle = 90), axis.text.y = element_text(size =6)) +
  theme(legend.position = "right", panel.background = element_rect(colour = "black",size = 1, linetype = "solid"))) #+ 
dev.off() 

In [None]:
%%R

fromNetwork <- read_excel("../output/metadata/cytoscape_items/18vs02mo_m5_FDR01_annotated_JD2.xlsx", sheet="New Top5 (JD)")

fromNetwork_clean <- select(fromNetwork, -contains(c("::Dataset_Chart", "::Genes", "::GS_DESCR", "::Name", "shared name", "selected", 
                                                     "::GS_Type", "Colouring", "::ES", "::fwer_qvalue", "::pvalue",
                                                     "aaTEC1", "aaTEC2", "Fat", "MEC", "nmSC", "PCvSMC" )))
forDotplot <- as_tibble(lapply(fromNetwork_clean[,c(16, 32, 33)], rep, ncol(select(fromNetwork_clean,starts_with("EnrichmentMap::fdr_qvalue")))))

tmpFDR = NULL
FDR=NULL
tmpNES = NULL
NES=NULL
tmp_subset = NULL
subset=NULL

# append (in order) all FDR columns and all NES columns
for (i in c(1:ncol(select(fromNetwork_clean ,starts_with("EnrichmentMap::fdr_qvalue"))))) {
  tmpFDR = select(fromNetwork_clean,starts_with("EnrichmentMap::fdr_qvalue"))[i]
  colnames(tmpFDR) = "FDR"
  FDR = rbind(FDR, tmpFDR)
}

for (i in c(1:ncol(select(fromNetwork_clean,starts_with("EnrichmentMap::NES"))))) {
  tmpNES = select(fromNetwork_clean,starts_with("EnrichmentMap::NES"))[i]
  colnames(tmpNES) = "NES"
  NES = rbind(NES, tmpNES) 
}


forDotplot$NES = NES$NES
forDotplot$FDR = FDR$FDR

forDotplot$subset = rep(str_replace(colnames(select(fromNetwork_clean,starts_with("EnrichmentMap::NES"))), "EnrichmentMap::NES ", ""), each=nrow(fromNetwork_clean))
colnames(forDotplot) <- c("gs_size", "name", "group", "NES", "FDR", "subset")

forDotplot$FDR[forDotplot$FDR>0.1] <- NA
forDotplot$FDRtr = -log(forDotplot$FDR+0.00001,10)
forDotplot$subset <- factor(forDotplot$subset, levels = c("(capsFB)", "(intFB)", "(medFB)", "(artEC)", "(capEC)", "(venEC)", "(cTEC)", "(mTEC1)", "(mTECprol)", "(mTEC2)", "(mTEC3)", "(tuft)", "(nTEC)", "(goblet)", "(Mlike)"))

In [None]:
%%R -w 26 -h 26 -u cm 

pdf("dotplot_pathways_d0_top5_NES_JD.pdf", width=9.5, height=13.5)

print(ggdotchart(forDotplot,  x='name', y='subset', group = 'group...136',  size='FDRtr', rotate=TRUE, color = 'NES', xlab = "", ylab = "", sorting = "none") +
  scale_colour_gradientn(colours = dichromat::colorschemes$BluetoGreen.14) +
  theme_pubclean() + theme(axis.text.x = element_text(face ="bold", angle = 90), axis.text.y = element_text(size =6)) +
  theme(legend.position = "right", panel.background = element_rect(colour = "black",size = 1, linetype = "solid"))) #+ 
dev.off() 

## Load lineage tracing

In [None]:
path_to_h5ad = '../output/metadata/anndata_objects/Fig1pt2.h5ad'

In [None]:
lineage = sc.read_h5ad(path_to_h5ad)
lineage.uns['log1p']["base"] = None

In [None]:
with_lineage = sc.concat(
    [adata_d0, lineage], # add more annadata objects here separated by commas
    join='outer', 
    label = 'dataset', 
    keys = ['ours', 'Foxn1'], # or use your sample_names list (as used above) here. 
    # Make sure the order of the batch categories matches that of the AnnData objects 
    index_unique = '@'
)

In [None]:
with_lineage.obs['dataset'] = ['lineage' if 'Foxn1' in x else 'lineage_wt' if 'wt' in x else 'ours' for x in with_lineage.obs['sample']]

In [None]:
with_lineage.uns['dataset_colors']=['blue', 'grey', 'grey80']

In [None]:
with_lineage.uns['stage_colors']= ['#76D6FF', '#FF8072', '#FF8072']

In [None]:
with_lineage.uns['cell_type_colors'] = ['#d62728','#19c9b3', '#FFA5D2', '#ff7f0e','#199919', '#aa40fc']

In [None]:
with_lineage.uns['cell_type_subset_colors'] = ['#F6222E', '#3283FE', '#16FF32', '#BDCDFF', '#3B00FB', '#1CFFCE', '#d62728', '#19c9b3','#FFA5D2',   'grey', '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6']

In [None]:
sc.pp.highly_variable_genes(with_lineage, n_top_genes=3500, n_bins=20, flavor='seurat',  inplace=True)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(with_lineage, n_comps=200, svd_solver='arpack', random_state=rng)

In [None]:
observe_variance(with_lineage)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(with_lineage, n_comps=40, svd_solver='arpack', random_state=rng)

### Integrating data using harmony

In [None]:
sce.pp.harmony_integrate(with_lineage, 'sample')

In [None]:
sc.pp.neighbors(with_lineage, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(with_lineage)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

# PC 75
sc.pl.umap(
    with_lineage, 
    color=['dataset', 'stage', 'cell_type', 'cell_type_subset', 'Trpa1'], 
    color_map='Spectral_r',
    use_raw=False, 
    ncols=4, 
    wspace = 0.2,
    outline_width=[0.6, 0.05], 
    size=15,
    na_color='white',
    frameon=False, 
    add_outline=True, 
    sort_order = False)

In [None]:
### Isolate TEC_d0 + lineage

In [None]:
TEC_with_lineage = with_lineage[with_lineage.obs['cell_type']=='TEC']

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(TEC_with_lineage, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(TEC_with_lineage, n_top_genes=3500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_with_lineage, n_comps=45, svd_solver='arpack', random_state=rng, use_highly_variable=True)
sce.pp.harmony_integrate(TEC_with_lineage, 'sample')
sc.pp.neighbors(TEC_with_lineage, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(TEC_with_lineage)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_with_lineage, 
    color=['tdT-WPRE_trans', 'CreER-WPRE_trans',  'stage', 'dataset', 'cell_type_subset'], 
    color_map='Spectral_r',
    use_raw=False, 
    ncols=5, 
    wspace = 0.3,
    outline_width=[0.6, 0.05], 
    size=15,
    na_color='white',
    frameon=False, 
    add_outline=True, 
    sort_order = False,
    save='lineage+ours.pdf')

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_with_lineage[TEC_with_lineage.obs['dataset']=='lineage'], 
    color=['tdT-WPRE_trans', 'CreER-WPRE_trans',  'stage', 'dataset', 'cell_type_subset'], 
    color_map='Spectral_r',
    use_raw=False, 
    ncols=5, 
    wspace = 0.3,
    outline_width=[0.6, 0.05], 
    size=15,
    na_color='white',
    frameon=False, 
    add_outline=True, 
    sort_order = False,
    save='lineage.pdf')

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_with_lineage[TEC_with_lineage.obs['dataset']=='ours'], 
    color=['tdT-WPRE_trans', 'CreER-WPRE_trans',  'stage', 'dataset', 'cell_type_subset'], 
    color_map='Spectral_r',
    use_raw=False, 
    ncols=5, 
    wspace = 0.3,
    outline_width=[0.6, 0.05], 
    size=15,
    na_color='white',
    frameon=False, 
    add_outline=True, 
    sort_order = False,
    save='ours.pdf')

## Load human data

In [None]:
#import sys
#!{sys.executable} -m pip install mousipy

In [None]:
from mousipy import translate

In [None]:
path_to_h5ad = '../output/metadata/anndata_objects/fig1_pt3.h5ad'

In [None]:
human_cd45neg = sc.read_h5ad(path_to_h5ad)

In [None]:
humanized_adata_d0 = translate(adata_d0)

In [None]:
in_human = sc.concat(
    [humanized_adata_d0, human_cd45neg], # add more annadata objects here separated by commas
    join='inner', 
    label = 'dataset', 
    keys = ['ours', 'human'], # or use your sample_names list (as used above) here. 
    # Make sure the order of the batch categories matches that of the AnnData objects 
    index_unique = '@'
)

In [None]:
in_human.uns['dataset_colors']=['blue', 'grey80']
in_human.uns['stage_colors']= ['#76D6FF', '#76D6FF', '#FF8072', '#FF8072']
in_human.uns['cell_type_colors'] = ['#d62728','#19c9b3', '#FFA5D2', '#ff7f0e','#199919', '#aa40fc']
in_human.uns['cell_type_subset_colors'] = ['#F6222E', '#3283FE', '#16FF32', '#BDCDFF', '#3B00FB', '#1CFFCE', '#d62728', '#19c9b3','#FFA5D2',   'grey', '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6', 'black']

In [None]:
sc.pp.highly_variable_genes(in_human, n_bins=20, flavor='seurat', inplace=True)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(in_human, n_comps=200, svd_solver='arpack', random_state=rng)

In [None]:
observe_variance(in_human)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(in_human, n_comps=60, svd_solver='arpack', random_state=rng)

### Integrating data using harmony

In [None]:
sce.pp.harmony_integrate(in_human, 'dataset')

In [None]:
sc.pp.neighbors(in_human, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(in_human)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

# PC 75
sc.pl.umap(
    in_human, 
    color=['dataset', 'stage', 'cell_type', 'cell_type_subset'], 
    color_map='Spectral_r',
    use_raw=False, 
    ncols=4, 
    wspace = 0.2,
    outline_width=[0.6, 0.05], 
    na_color='white',
    size=15,
    frameon=False, 
    add_outline=True, 
    sort_order = False)

In [None]:
### Isolate TEC_d0 + human TEC

In [None]:
TEC_with_humanTEC = in_human[(in_human.obs['cell_type']=='TEC')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(TEC_with_humanTEC, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(TEC_with_humanTEC, n_top_genes=500, n_bins=20, flavor='seurat', batch_key='dataset')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_with_humanTEC, n_comps=45, svd_solver='arpack', random_state=rng, use_highly_variable=True)
sce.pp.harmony_integrate(TEC_with_humanTEC, 'dataset')
sc.pp.neighbors(TEC_with_humanTEC, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(TEC_with_humanTEC)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_with_humanTEC, 
    color=['dataset', 'stage', 'cell_type', 'cell_type_subset', 'HES6', 'ASCL1', 'KRT5', 'CHGA', 'ALDOC', 'CD24', 'KIF19', 'TRPM3', 'CCL21', 'SCG5', 'leiden_1.0'], 
    color_map='Spectral_r',
    use_raw=False, 
    ncols=5, 
    wspace = 0.3,
    outline_width=[0.6, 0.05], 
    na_color='white',
    frameon=False, 
    add_outline=True, 
    sort_order = False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_with_humanTEC, 
    color=['dataset', 'stage', 'cell_type', 'cell_type_subset', 'APP',  'KRT14', 'KRT5', 'KRT8', 'KRT10', 'IL6R'], 
    color_map='Spectral_r',
    use_raw=False, 
    ncols=4, 
    wspace = 0.3,
    outline_width=[0.6, 0.05], 
    na_color='white',
    frameon=False, 
    add_outline=True, 
    sort_order = False)