## Notebook setup

In [None]:
import scanpy as sc
import scanpy.external as sce
import numpy as np
import pandas as pd
import warnings, scipy.sparse as sp, matplotlib, matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.pyplot import rc_context
from collections import Counter
import matplotlib.font_manager
import pyreadr
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects
import magic
#import seaborn as sns
import palantir
import loompy
#from scipy.sparse import csgraph

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = 'Arial'
matplotlib.rc('font', size=14)
import matplotlib.lines as lines

pd.set_option('display.max_rows', 200)

sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)
sc.settings.verbosity = 0 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()

In [None]:
user_defined_palette =  [ '#F6222E', '#FEAF16','#3283FE','#BDCDFF', '#3B00FB', '#F8A19F', '#1CFFCE',  '#C4451C', 
                          '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', '#1CBE4F','#B5EFB5', '#0e452b', '#AA0DFE']

In [None]:
user_defined_cmap_markers = LinearSegmentedColormap.from_list('mycmap', ["#E6E6FF", "#CCCCFF", "#B2B2FF", "#9999FF",  "#6666FF",   "#3333FF", "#0000FF"])
user_defined_cmap_degs = LinearSegmentedColormap.from_list('mycmap', ["#0000FF", "#3333FF", "#6666FF", "#9999FF", "#B2B2FF", "#CCCCFF", "#E6E6FF", "#E6FFE6", "#CCFFCC", "#B2FFB2", "#99FF99", "#66FF66", "#33FF33", "#00FF00"])

In [None]:
%matplotlib inline 

## Load data for Figure 1

In [None]:
path_to_h5ad = '../output/metadata/anndata_objects/ext_fig5.h5ad'

In [None]:
adata_d147 = sc.read_h5ad(path_to_h5ad)
adata_d147.uns['log1p']["base"] = None

In [None]:
sc.pl.umap(adata_d147, color=['cell_type', 'stage', 'day'], 
                     color_map='Spectral_r',
                     use_raw=False, 
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 
                     sort_order = False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)

sc.pl.umap(adata_d147, color=['Pdgfra',  'Epcam', 'H2-Aa',  'Pecam1', 'Cdh5',  'Nkain4', 'Upk3b', 'Acta2', 'Myl9', 'S100b'], 
                        color_map='Spectral_r',
                        use_raw=False, 
                        ncols=5, 
                        wspace = 0.1,
                        outline_width=[0.6, 0.05], 
                        size=15,  
                        frameon=False, 
                        add_outline=True, 
                        sort_order = False,
                        save='_canonical_d147_S5.pdf'
                        )

## Analyze TEC, FB and EC and annotate based on public marker genes and signatures

### Our EC

In [None]:
EC_d147 = adata_d147[(adata_d147.obs['cell_type']=='EC')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(EC_d147, min_cells=1)

#### Identify highly variable genes (all samples but day 1) and replot the data

In [None]:
EC_d47 = EC_d147[EC_d147.obs['day']!='d1']

In [None]:
# Remove genes that are not expressed in any cells (remove columns with all 0s)
sc.pp.filter_genes(EC_d47, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(EC_d47, n_top_genes=3500, flavor='seurat')

In [None]:
hvgs = EC_d47.var[EC_d47.var['highly_variable']==True].index

In [None]:
EC_d147.var['highly_variable'] = ''

In [None]:
EC_d147.var['highly_variable'] = [True if x in hvgs else False for x in EC_d147.var['highly_variable'].index]

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(EC_d147, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
def observe_variance(anndata_object):
    fig = plt.figure(figsize=(10,5))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    # variance per principal component
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = anndata_object.uns['pca']['variance_ratio']
    ax1.scatter(x,y,s=4)
    ax1.set_xlabel('PC')
    ax1.set_ylabel('Fraction of variance explained\n')
    ax1.set_title('Fraction of variance explained per PC\n')
    # cumulative variance explained
    cml_var_explained = np.cumsum(anndata_object.uns['pca']['variance_ratio'])
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = cml_var_explained
    ax2.scatter(x,y,s=4)
    ax2.set_xlabel('PC')
    ax2.set_ylabel('Cumulative fraction of variance\nexplained')
    ax2.set_title('Cumulative fraction of variance\nexplained by PCs')
    fig.tight_layout()
    plot = plt.show
    return(plot)
observe_variance(EC_d147)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(EC_d147, n_comps=35, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(EC_d147, 'sample')

In [None]:
sc.pp.neighbors(EC_d147, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(EC_d147)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    EC_d147, 
    color=['cell_type', 'stage', 'day', 'sample'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

### EC signatures at steady state

In [None]:
artEC_sign = ['Fbln5',	'Stmn2',	'Clu',	'Sema3g',	'Glul',	'Tm4sf1',	'Eps8l2',	'Col8a1',	'Nebl',	'Vegfc',	'Alpl',	'Arl15',	'Gja4',	'Edn1',	'S100a6',	'Mast4',	'Sox17',	'Pdgfd',	'Fbln2',	'Vim',	'Ltbp4',	'Slc6a6',	'Klf2',	'Heg1',	'Epas1',	'Fxyd5',	'Ly6a',	'Crip1',	'Fn1',	'Cd9',	'Mecom',	'Tsc22d1',	'Bsg',	'Atox1',	'Podxl',	'Ptprr',	'Ebf1',	'Cdk19',	'Icam2',	'Tspo',	'Cst3',	'Eln',	'Azin1',	'Pcsk5',	'Gadd45g',	'Timp3',	'Ace',	'Tanc2',	'Rgs10',	'Gja5']
capEC_sign = ['Gpihbp1',	'Rgcc',	'Fabp4',	'Cd36',	'Car4',	'Mgll',	'Cd300lg',	'Tcf15',	'Kdr',	'Arhgap18',	'Igfbp7',	'Kank3',	'Aqp7',	'Etl4',	'Tspan13',	'Cavin2',	'Hspb1',	'Ppp1r2',	'Lims2',	'Fabp5',	'Nrp1',	'Timp4',	'Gng11',	'Sept4',	'Lpl',	'Gm12002',	'Ccdc85a',	'Plpp3',	'Ablim3',	'Sparc',	'Xdh',	'Tmsb4x',	'AW112010',	'Ctnnbip1',	'Thrsp',	'Adgrl4',	'Cav2',	'Cxcl12',	'Dhrs3',	'Cd81',	'Emcn',	'Tcim',	'C1qtnf9',	'Sparcl1',	'Ccdc85b',	'Pitpnc1',	'Rflnb',	'Ubb',	'Ly6c1',	'Sult1a1']
venEC_sign = ['Lrg1',	'Il6st',	'Vwf',	'Plvap',	'mt-Co1',	'Selp',	'Spint2',	'Pcdh7',	'mt-Nd4',	'2200002D01Rik',	'Tmsb10',	'Eef1a1',	'mt-Co3',	'Bgn',	'Pde4d',	'mt-Atp6',	'Aqp1',	'Tpt1',	'Ctla2a',	'Ackr1',	'Apoe',	'Pdlim1',	'Cd9',	'St3gal4',	'Ehd4',	'Fth1',	'Eef1b2',	'Pecam1',	'Thsd7a',	'Col15a1',	'Man1a',	'Vim',	'Enpp2',	'Rbp1',	'Igfbp4',	'Abca1',	'Csrp2',	'Slco2b1',	'Zfp521',	'Vcam1',	'Dpysl3',	'Nr2f2',	'Pam',	'Ldb2',	'Insr',	'Cd74',	'Tmem176b',	'Il1r1',	'Arrb1',	'Arhgap26']

In [None]:
x=9
sc.tl.score_genes(EC_d147, gene_list = artEC_sign[0:x],  score_name='artEC_sign', use_raw=False)
sc.tl.score_genes(EC_d147, gene_list = capEC_sign[0:x],  score_name='capEC_sign', use_raw=False)
sc.tl.score_genes(EC_d147, gene_list = venEC_sign[0:x],  score_name='venEC_sign', use_raw=False)


In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    EC_d147,
    color=['artEC_sign', 'capEC_sign', 'venEC_sign'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0,
    save='_signEC_d147_S5.pdf'
    
)

### EC clustering and annotation

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(EC_d147, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    EC_d147, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
EC_d147.obs['cell_type_subset'] = ['0:arEC' if (x=='3') else 
                                 '1:capEC' if (x=='0' or x=='1') else
                                 '2:venEC' if (x=='2') else 'ERROR' for x in EC_d147.obs['leiden_0.2']] 

In [None]:
EC_d147.uns['cell_type_subset_colors'] = ['#F6222E', '#3283FE', '#16FF32', 'grey']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    EC_d147, 
    color=['day', 'stage', 'leiden_0.2', 'cell_type_subset'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True,
    save='_subsetEC_d147_S5.pdf'

)

### Our FB

In [None]:
FB_d147 = adata_d147[(adata_d147.obs['cell_type']=='FB')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(FB_d147, min_cells=1)

#### Identify highly variable genes (all samples but day 1) and replot the data

In [None]:
FB_d47 = FB_d147[FB_d147.obs['day']!='d1']

In [None]:
# Remove genes that are not expressed in any cells (remove columns with all 0s)
sc.pp.filter_genes(FB_d47, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(FB_d47, n_top_genes=3500, flavor='seurat')

In [None]:
hvgs = FB_d47.var[FB_d47.var['highly_variable']==True].index

In [None]:
FB_d147.var['highly_variable'] = ''

In [None]:
FB_d147.var['highly_variable'] = [True if x in hvgs else False for x in FB_d147.var['highly_variable'].index]

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d147, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(FB_d147)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d147, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(FB_d147, 'sample')

In [None]:
sc.pp.neighbors(FB_d147, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(FB_d147)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB_d147, 
    color=['Adipoq'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

### FB clustering and annotation

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
    sc.tl.leiden(FB_d147, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    FB_d147, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
FB_d147.obs['cell_type_subset'] = ['3:capsFB' if (x=='1') else 
                                   '4:intFB' if (x=='2' or x=='3' or x=='4') else
                                   '5:medFB' if (x=='0') else
                                   '9:Fat' if (x=='5') else 'ERROR' for x in FB_d147.obs['leiden_0.4']] 

In [None]:
FB_d147.uns['cell_type_subset_colors'] = ['#BDCDFF', '#3B00FB', '#1CFFCE', 'grey']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB_d147, 
    color=['cell_type_subset'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

In [None]:
FB_d147_nofat = FB_d147[(FB_d147.obs['cell_type_subset']!='9:Fat')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(FB_d147_nofat, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(FB_d147_nofat, n_top_genes=3500, n_bins=20, flavor='seurat')

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d147_nofat, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(FB_d147_nofat)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d147_nofat, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(FB_d147_nofat, 'sample')

In [None]:
sc.pp.neighbors(FB_d147_nofat, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(FB_d147_nofat)

### FB signatures at steady state

In [None]:
capsFB_sign = ['Pi16',	'Timp2',	'Fn1',	'Opcml',	'Cd248',	'Mfap5',	'Anxa3',	'Pcolce2',	'Ackr3',	'Fndc1',	'Igfbp6',	'Sema3c',	'Clec3b',	'Creb5',	'Smpd3',	'Tmem100',	'Adgrd1',	'Ly6c1',	'Pcsk6',	'Nid1',	'Fbn1',	'Dpp4',	'Pla1a',	'Efhd1',	'Col14a1',	'Ebf2',	'Fstl1',	'Metrnl',	'Plpp3',	'Sdk1',	'Limch1',	'Axl',	'Loxl1',	'Anxa1',	'Emilin2',	'Tmsb4x',	'Tnxb',	'Gfpt2',	'Lsp1',	'Ly6a',	'Efna5',	'Adamts5',	'Igfbp5',	'Ugdh',	'Timp3',	'Ogn',	'Heg1',	'Islr',	'Ddr2',	'Ppp1r14b']
intFB_sign = ['Smoc2',	'Gpx3',	'Penk',	'Igf1',	'Gas1',	'Inmt',	'Pcolce',	'Lrp1',	'Itm2a',	'Gdf10',	'Rbp1',	'Fbln1',	'Sfrp1',	'Slit3',	'Svep1',	'Auts2',	'Lpl',	'Celf2',	'Serpinf1',	'Il11ra1',	'Adamts12',	'Abca8a',	'Cygb',	'Nfib',	'Dcn',	'Selenop',	'Ntrk2',	'Tmem119',	'Mgst1',	'Igfbp3',	'Mmp2',	'Ar',	'Gsn',	'Srpx',	'Col15a1',	'Mt1',	'Lum',	'Olfml3',	'Mfap2',	'Col3a1',	'Fxyd6',	'Fst',	'Cryab',	'Txnip',	'Htra3',	'Mfap4',	'Itm2b',	'Fbln2',	'Dhrs3',	'Col1a2']
medFB_sign = ['Csmd1',	'Serpine2',	'Enpp2',	'Ptn',	'Tmem176b',	'Tmem176a',	'B2m',	'Ccl19',	'Igfbp7',	'Apod',	'Bgn',	'H2-D1',	'Des',	'Ltc4s',	'Lhfp',	'Ifitm1',	'Cd9',	'Apoe',	'Lsamp',	'Ly6e',	'H2-K1',	'H2-Q7',	'Sparcl1',	'Il34',	'Ndufa4l2',	'Tcf4',	'Spon1',	'Colec12',	'Nrp1',	'Mylk',	'Hsd11b1',	'Mfge8',	'Jun',	'Cp',	'Meox1',	'Ecscr',	'Trps1',	'Ank2',	'Gpm6b',	'Cstb',	'Cd63',	'Pde4b',	'Postn',	'H3f3b',	'Mgp',	'Pten',	'Ptma',	'Ltbp1',	'Col15a1',	'Pde7b']

In [None]:
x=9
sc.tl.score_genes(FB_d147_nofat, gene_list = capsFB_sign[0:x],  score_name='capsFB_sign', use_raw=False)
sc.tl.score_genes(FB_d147_nofat, gene_list = intFB_sign[0:x],  score_name='intFB_sign', use_raw=False)
sc.tl.score_genes(FB_d147_nofat, gene_list = medFB_sign[0:x],  score_name='medFB_sign', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB_d147_nofat,
    color=[ 'capsFB_sign', 'intFB_sign', 'medFB_sign'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0,
    save='_signFB_d147_S5.pdf'

)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB_d147_nofat, 
    color=['day', 'stage', 'leiden_0.4', 'cell_type_subset'], 
    ncols=6,   
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True,
    save='_subsetFB_d147_S5.pdf'

)

### Our TEC

In [None]:
TEC_d147 = adata_d147[(adata_d147.obs['cell_type']=='TEC')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(TEC_d147, min_cells=1)

#### Identify highly variable genes (all samples but day 1) and replot the data

In [None]:
TEC_d47 = TEC_d147[TEC_d147.obs['day']!='d1']

In [None]:
# Remove genes that are not expressed in any cells (remove columns with all 0s)
sc.pp.filter_genes(TEC_d47, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(TEC_d47, n_top_genes=3500, flavor='seurat')

In [None]:
hvgs = TEC_d47.var[TEC_d47.var['highly_variable']==True].index

In [None]:
TEC_d147.var['highly_variable'] = ''

In [None]:
TEC_d147.var['highly_variable'] = [True if x in hvgs else False for x in TEC_d147.var['highly_variable'].index]

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_d147, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(TEC_d147)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_d147, n_comps=35, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(TEC_d147, 'sample')

In [None]:
sc.pp.neighbors(TEC_d147, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(TEC_d147)

### TEC signatures at steady state

In [None]:
aaTEC1_sign = ['Zbtb20',	'Cd81',	'Trpm3',	'Slc16a11',	'Gstm1',	'Btg1',	'BC006965',	'Cldn3',	'S100a11',	'Gsta4',	'Ly6e',	'Cdk19',	'Wfdc18',	'Slc9a3r1',	'Ifi27l2a',	'Aldoc',	'Phlda1',	'Anxa2',	'Taldo1',	'Socs2',	'Fam107a',	'Krt18',	'Pbx1',	'Tm4sf1',	'Meis2',	'Gsn',	'Anxa1',	'Bmp6',	'Epcam',	'Dbi',	'Shank2',	'Kif19a',	'Isl1',	'Plin2',	'Ly6a',	'Nupr1',	'Id2',	'Rcn1',	'Gpc6',	'Rdh10',	'Cd24a',	'mt-Co1',	'Gab2',	'Pde4b',	'Slc5a8',	'Nedd4l',	'Fbxo2',	'Tshz2',	'Acss3',	'Cd9']
aaTEC2_sign = ['Csmd1',	'Zeb2',	'Mgp',	'Gsn',	'Zbtb20',	'Ndrg2',	'Serpine2',	'Cd81',	'Fxyd1',	'Laptm4a',	'Gpx3',	'Socs3',	'Igfbp7',	'Airn',	'Lgals1',	'Fos',	'Prrx1',	'Prkg1',	'Junb',	'Sparc',	'Cebpd',	'Apoe',	'Gstm1',	'Gadd45g',	'Id3',	'Egr1',	'Ptn',	'Cd63',	'Tm4sf1',	'Ccdc80',	'Phlda1',	'Cavin3',	'Cald1',	'Serping1',	'Sparcl1',	'Ccl19',	'Cst3',	'Cacna1c',	'Jund',	'Fstl1',	'Tagln2',	'Klf9',	'Jun',	'Gucy1a1',	'Col3a1',	'Notch3',	'Rora',	'Tshz2',	'Lhfp',	'Gnas']
cTEC_sign = ['Ctsl',	'Cstb',	'Cxcl12',	'Prss16',	'Krt18',	'Gas6',	'Pltp',	'Slc46a2',	'Ndufa11',	'Tbata',	'Ccl25',	'Psmb11',	'Tmem131l',	'Nlgn1',	'Prxl2b',	'Pax1',	'Wnt4',	'Dpp6',	'Psmb9',	'Bnip3l',	'Snhg11',	'Copz2',	'AI646519',	'Ank3',	'Lamp2',	'Ndrg3',	'Shisa2',	'Rgcc',	'Krt8',	'Rbfox1',	'Tsc22d1',	'Kctd1',	'Ly75',	'Sh2d4b',	'Igfbp5',	'Fabp5',	'Plgrkt',	'Spock2',	'Ryr3',	'Syngr1',	'Zfyve21',	'Tenm4',	'Trp63',	'Clic5',	'Gmpr',	'Castor1',	'Macf1',	'Ifi27',	'Limch1',	'Kcnk2']
mTEC1_sign = ['Apoe',	'Ifitm3',	'Rbms3',	'Ly6a',	'Ifi27l2a',	'Krt5',	'Krt14',	'Ctsl',	'Gas1',	'Ifitm2',	'Ccl21a',	'Itm2b',	'Gas6',	'Dcn',	'Isg15',	'Igfbp4',	'Laptm4a',	'Eya4',	'Cpne8',	'Mgp',	'Mir100hg',	'Sult5a1',	'Rtp4',	'Iigp1',	'Nxn',	'Rbp1',	'Gas5',	'Zfp36l1',	'Gsn',	'Ank3',	'Cst3',	'Pbx1',	'Lifr',	'Meis2',	'Perp',	'Oasl2',	'Anxa2',	'S100a10',	'Boc',	'Atp1a1',	'Ccl11',	'Phlda3',	'Ly6e',	'Nedd4',	'Myl6',	'Trp63',	'Anxa1',	'Tpt1',	'Hif1a',	'Gpx3']
mTECprol_sign = ['Tpm2',	'Krt17',	'Ascl1',	'Tmsb10',	'Ptma',	'Rgs5',	'Adm',	'H2afy2',	'Hes6',	'Krt7',	'Cdk4',	'Pfn1',	'Sox4',	'Skint10',	'Tubb6',	'Cfl1',	'Mapk13',	'Tubb2b',	'Wfdc18',	'Ccnd2',	'Stmn1',	'Mdk',	'Ccl19',	'Cd82',	'Gm49708',	'H3f3b',	'Lgals1',	'Prxl2b',	'Ybx1',	'Actg1',	'Eef1g',	'Nme2',	'Gm15943',	'Il11ra1',	'Colq',	'Cald1',	'Krt5',	'Hnrnpa1',	'Eif4a1',	'Ppia',	'Ccl21a',	'Tubb5',	'Hsp90ab1',	'Angptl2',	'Kcnq3',	'Lpo',	'Set',	'Hspa8',	'Hmgn1',	'Npm1']
mTEC2_sign = ['Srgn',	'Cyba',	'Ubd',	'Cd74',	'S100a14',	'Syt1',	'Cd52',	'Aire',	'H2-Aa',	'H2-Oa',	'Lrrc42',	'Mrpl38',	'H2-Eb1',	'Hagh',	'Nfkbia',	'Dpp10',	'Cdx1',	'Psme2',	'Hdc',	'Calcb',	'H2-DMb2',	'H2-Eb2',	'Fam89a',	'Gm47938',	'Hspb11',	'Fscn1',	'Csn2',	'Bspry',	'Ndufc2',	'Fezf2',	'Fabp5',	'Ankrd33b',	'Ing1',	'Gm48239',	'Utf1',	'Il4i1',	'Txn1',	'Ctss',	'Nup85',	'Cib1',	'Laptm5',	'Tm4sf5',	'Syngr2',	'Dock10',	'Dio1',	'Nsmce2',	'Snx29',	'S100g',	'Cd40',	'Ptprq']
mTEC3_sign = ['Ly6d',	'Dapl1',	'Dmkn',	'Spink5',	'Skint3',	'Fxyd3',	'Sfn',	'Tacstd2',	'Hspb1',	'Cdkn1a',	'Oit1',	'Pdzk1ip1',	'Prdx5',	'Cdkn2a',	'Perp',	'Cdkn2b',	'Gsta4',	'Atox1',	'Lypd3',	'Cst6',	'Metrnl',	'Sbsn',	'Dstn',	'Dsp',	'Calml3',	'Rab11a',	'Krt17',	'Krt23',	'Bcl2a1b',	'Serpinb2',	'H2afj',	'Jup',	'Avpi1',	'4833423E24Rik',	'Ide',	'Cebpb',	'Gsta2',	'Sdc1',	'Clic3',	'Tmem54',	'2200002D01Rik',	'Dgat2',	'Tmem45a',	'Cldn4',	'Rptn',	'Gltp',	'Nupr1',	'Arf6',	'Tspan8',	'Ier5']
tuft_sign = ['Mctp1',	'Gng13',	'Avil',	'Rgs13',	'Espn',	'Lrmp',	'Anxa4',	'Ltc4s',	'Ptpn18',	'Bmx',	'Cystm1',	'Ivns1abp',	'Fyb',	'Ly6g6f',	'Calm2',	'Ethe1',	'Reep5',	'Lima1',	'1810046K07Rik',	'Abhd2',	'Pik3r5',	'Trpm5',	'St18',	'Crip1',	'Dgki',	'Pou2f3',	'Vav1',	'Scand1',	'Chil1',	'Alox5ap',	'Ptpn6',	'Plac8',	'Alox5',	'Ostf1',	'Oxr1',	'Plk2',	'Ociad2',	'Sh2d6',	'Ahnak2',	'Stk38',	'Tmem245',	'Atp1a2',	'Aldh2',	'Cd47',	'Fxyd6',	'Inpp5d',	'Dclk1',	'Gpcpd1',	'Sh2d7',	'Strip2']
nTEC_sign = ['Ptprn2',	'Cacna2d1',	'Car8',	'Scg5',	'Stxbp5l',	'Snap25',	'Chga',	'Cd9',	'Dnajc12',	'Ccser1',	'Camk2n1',	'Syt7',	'Pam',	'Cacna1a',	'Syt1',	'Cacnb2',	'Cystm1',	'Tshz2',	'Fam183b',	'Cplx2',	'Ceacam10',	'Btg2',	'Rims2',	'Resp18',	'Fhl2',	'A230057D06Rik',	'Tmem163',	'Rap1gap2',	'Kcnb2',	'Nfasc',	'Pip5k1b',	'Alcam',	'Chgb',	'Ica1',	'Pcbd1',	'Krt7',	'Insm1',	'Smim22',	'Jund',	'5330417C22Rik',	'Atf3',	'AC149090.1',	'Bex2',	'Prkn',	'Cpn1',	'Tox3',	'Nol4',	'A330076H08Rik',	'Aopep',	'Stard10']
goblet_sign = ['Wfdc2',	'Cyp2f2',	'Cxcl17',	'Gsto1',	'Nupr1',	'S100a11',	'Anxa3',	'Sorbs2',	'H2-K1',	'Serpinb11',	'Krt19',	'Slc12a2',	'Fxyd3',	'Ly6d',	'Aqp5',	'Alcam',	'Bsg',	'Bace2',	'Ly6e',	'F3',	'Ly6a',	'Mecom',	'AW112010',	'Pglyrp1',	'Atp1b1',	'Cp',	'Anxa1',	'Tspan8',	'Irf7',	'Lgals3bp',	'Tceal9',	'Nfib',	'Gsta4',	'Cd14',	'Mllt3',	'Slc16a11',	'Upk1b',	'Lrrc26',	'Foxa1',	'Ece1',	'Timp2',	'Ifi27l2a',	'Ces1d',	'Epas1',	'Cbr2',	'Lmo7',	'Spink5',	'Hspb1',	'Smim22',	'Kcnj16']
Mlike_sign = ['Ccl20',	'Ccl9',	'Serpinb6a',	'Serpinb1a',	'Tmsb4x',	'2200002D01Rik',	'Nostrin',	'Ctsh',	'Ccl6',	'AW112010',	'Spib',	'Bcl2a1d',	'Csn2',	'Cyp2a5',	'Fabp1',	'Hamp',	'Pold1',	'Bcl2a1b',	'Atox1',	'Iscu',	'Pglyrp1',	'Bcl2a1a',	'Clu',	'Krt20',	'Atp6v1c1',	'H2-M2',	'Gjb2',	'Crip1',	'Fabp5',	'Ubd',	'Vamp5',	'Marcksl1',	'Plb1',	'Arpc1b',	'4930520O04Rik',	'Tnfrsf11b',	'Sephs2',	'Dsg1a',	'Spint2',	'Ftl1',	'AA467197',	'Sept1',	'Aif1',	'Gadd45a',	'Tnfaip2',	'Vamp8',	'Fabp4',	'Hspe1',	'Rac2',	'Ahcyl2']

In [None]:
x=9
sc.tl.score_genes(TEC_d147, gene_list = aaTEC1_sign[0:x],  score_name='aaTEC1_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = aaTEC2_sign[0:x],  score_name='aaTEC2_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = cTEC_sign[0:x],  score_name='cTEC_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = mTEC1_sign[0:x],  score_name='mTEC1_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = mTECprol_sign[0:x],  score_name='mTECprol_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = mTEC2_sign[0:x],  score_name='mTEC2_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = mTEC3_sign[0:x],  score_name='mTEC3_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = tuft_sign[0:x],  score_name='tuft_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = nTEC_sign[0:x],  score_name='nTEC_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = goblet_sign[0:x],  score_name='goblet_sign', use_raw=False)
sc.tl.score_genes(TEC_d147, gene_list = Mlike_sign[0:x],  score_name='Mlike_sign', use_raw=False)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d147,
    color=['aaTEC1_sign', 'aaTEC2_sign', 'cTEC_sign', 'mTEC1_sign', 'mTECprol_sign', 'mTEC2_sign', 
          'mTEC3_sign', 'tuft_sign', 'nTEC_sign', 'goblet_sign','Mlike_sign'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.1,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False,
    vmin=0,
    save='_signTEC_d147_S5.pdf'
)

### TEC clustering and annotation

In [None]:
for resolution_parameter in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0, 1.1, 1.2, 1.3, 1.4, 1.5,1.6,1.7, 1.8, 1.9, 2.0]:
    sc.tl.leiden(TEC_d147, resolution=resolution_parameter, random_state=42, 
                        key_added='leiden_'+str(resolution_parameter))

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=False, transparent=True)
sc.pl.umap(
    TEC_d147, 
    color=['leiden_0.1', 'leiden_0.2', 'leiden_0.3', 'leiden_0.4', 'leiden_0.5', 
           'leiden_0.6', 'leiden_0.7', 'leiden_0.8','leiden_0.9', 'leiden_1.0',
          'leiden_1.1', 'leiden_1.2', 'leiden_1.3','leiden_1.4', 'leiden_1.5'], 
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.7,
    outline_width=[0.6, 0.05],
    frameon=False,
    add_outline=True,
    sort_order = False
)

In [None]:
TEC_d147.obs['cell_type_subset'] = ['11:aaTEC2' if (x=='16') else '10:aaTEC1' if (x=='7' or x=='18') else
                                    '12:cTEC' if (x=='2' or x=='5' or x=='9' or x=='20') else '13:mTEC1' if (x=='0' or x=='1' or x=='3'  or x=='13') else 
                                    '14:mTEC-prol' if x=='10' else '15:mTEC2' if (x=='6' or x=='8' or x=='14') else
                                    '16:mTEC3' if (x=='15') else '17:mimic(tuft)' if (x=='4' or x=='11' or x=='21') else '18:mimic(neuroendo)' if x=='17' else
                                    '19:mimic(goblet)' if x=='12' else '20:mimic(microfold)' if x=='19' else 'ERROR' for x in TEC_d147.obs['leiden_1.3']] 

In [None]:
TEC_d147.uns['cell_type_subset_colors'] = [ '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6', 'black']

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d147, 
    color=['day', 'stage', 'leiden_1.3', 'cell_type_subset'], 
    ncols=6,   
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True,
    save='_subsetTEC_d147_S5.pdf'

)

## Transfer annotation

In [None]:
annotated_subsets = pd.concat([EC_d147.obs['cell_type_subset'], FB_d147.obs['cell_type_subset'],  TEC_d147.obs['cell_type_subset'], adata_d147[(adata_d147.obs['cell_type']=='6:MEC') | (adata_d147.obs['cell_type']=='7:vSMC/PC') | (adata_d147.obs['cell_type']=='8:nmSC')].obs['cell_type']])

In [None]:
adata_d147.obs['cell_type_subset']=''

In [None]:
adata_d147.obs['cell_type_subset'][adata_d147.obs.index.isin(annotated_subsets.index) == True] = annotated_subsets

In [None]:
adata_d147.uns['cell_type_subset_colors'] = ['#F6222E', '#3283FE', '#16FF32', '#BDCDFF', '#3B00FB', '#1CFFCE', '#d62728', '#19c9b3','#FFA5D2',   'grey', '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6', 'black']

In [None]:
sc.pl.umap(adata_d147, color=['stage', 'day', 'cell_type', 'cell_type_subset'], 
                     color_map='Spectral_r',
                     use_raw=False, 
                     ncols=4, 
                     wspace = 0.3,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 
                     sort_order = False,
                     save='_subsets_d0_S5.pdf'

                     )

In [None]:
path_to_h5ad = '../output/metadata/anndata_objects/figS5_annotated.h5ad'

In [None]:
adata_d147.write(path_to_h5ad)

In [None]:
adata_d147 = sc.read_h5ad(path_to_h5ad)
adata_d147.uns['log1p']["base"] = None

### Violin plots for EC, FB, TEC with scanpy v1.4.6

In [None]:
import scanpy as sc
import pandas as pd
pd.set_option('display.max_rows', 200)

sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)
sc.settings.verbosity = 3 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_versions()

In [None]:
markers = ['Pdgfra',  'Epcam', 'H2-Aa',  'Pecam1', 'Cdh5',  'Nkain4', 'Upk3b', 'Acta2', 'Myl9', 'S100b']

In [None]:
axes = sc.pl.stacked_violin(adata_d147, markers, groupby='cell_type', palette=['#199919', '#aa40fc', '#ff7f0e',
                                                                               '#d62728', '#19c9b3', '#FFA5D2'], standard_scale='var', cut=3, use_raw=False, figsize=(3,5), order = ['FB', 'TEC', 'EC', '6:MEC', '7:vSMC/PC', '8:nmSC'], swap_axes=True, save='_d147.pdf')

In [None]:
EC_markers = ['Pecam1', 'Vwf', 'Vcam1', 'Glul', 'Cldn5', 'Aqp7', 'Car4', 'Kdr', 'Plvap',  'Selp', 'Bmp4']

In [None]:
EC_d147 = adata_d147[adata_d147.obs['cell_type']=='EC']

In [None]:
axes = sc.pl.stacked_violin(adata_d147[adata_d147.obs['cell_type']=='EC'], EC_markers, groupby='cell_type_subset', palette=adata_d147[adata_d147.obs['cell_type']=='EC'].uns['cell_type_subset_colors'], standard_scale='var', cut=3, use_raw=False, figsize=(1.75,6), swap_axes=True, save='_d147_EC.pdf')

In [None]:
FB_markers = ['Pdgfra', 'Dpp4', 'Fn1',  'Pi16', 'Gpx3', 'Ar', 'Penk', 'Inmt', 'Bmp4', 'Ptn' , 'Postn']

In [None]:
axes = sc.pl.stacked_violin(adata_d147[(adata_d147.obs['cell_type']=='FB') & (adata_d147.obs['cell_type_subset']!='9:Fat')], FB_markers, groupby='cell_type_subset', palette=adata_d147[(adata_d147.obs['cell_type']=='FB') & (adata_d147.obs['cell_type_subset']!='9:Fat')].uns['cell_type_subset_colors'], standard_scale='var', cut=3, use_raw=False, figsize=(1.75,6), swap_axes=True, save='_d147_FB.pdf')

In [None]:
TEC_markers =['Epcam', 'H2-Aa',  'Prss16', 'Ccl21a', 'Ccnd2', 'Aire', 'Ly6d',  'Wfdc2', 'Ccl20', 'Car8',  'Avil']

In [None]:
subset_order = ['12:cTEC','13:mTEC1', '14:mTEC-prol',   '15:mTEC2', '16:mTEC3', '19:mimic(goblet)', '20:mimic(microfold)', '18:mimic(neuroendo)', '17:mimic(tuft)',  '10:aaTEC1', '11:aaTEC2']

In [None]:
axes = sc.pl.stacked_violin(adata_d147[adata_d147.obs['cell_type']=='TEC'], TEC_markers, groupby='cell_type_subset', palette=[ '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f',
       '#b5efb5','#325a9b','#c075a6', '#feaf16',  '#aa0dfe',  '#2ed9ff', '#c1c119'], standard_scale='var', cut=3, order=subset_order, use_raw=False, figsize=(6.4,6), swap_axes=True, save='_d147_TEC.pdf')