## Notebook setup

In [None]:
import scanpy as sc
import scanpy.external as sce
import numpy as np
import pandas as pd
import warnings, scipy.sparse as sp, matplotlib, matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
from matplotlib.pyplot import rc_context
from collections import Counter
import matplotlib.font_manager
import pyreadr
import rpy2
from rpy2.robjects.packages import importr
import rpy2.robjects as robjects
import magic
#import seaborn as sns
import palantir
import loompy
#from scipy.sparse import csgraph

matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['ps.fonttype'] = 42
matplotlib.rcParams['font.family'] = 'sans-serif'
matplotlib.rcParams['font.sans-serif'] = 'Arial'
matplotlib.rc('font', size=14)
import matplotlib.lines as lines

pd.set_option('display.max_rows', 200)

sc.set_figure_params(dpi=80, dpi_save=300, color_map='Spectral_r', vector_friendly=True, transparent=True)
sc.settings.verbosity = 0 # verbosity: errors (0), warnings (1), info (2), hints (3)
sc.logging.print_header()

In [None]:
user_defined_palette =  [ '#F6222E', '#FEAF16','#3283FE','#BDCDFF', '#3B00FB', '#F8A19F', '#1CFFCE',  '#C4451C', 
                          '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', '#1CBE4F','#B5EFB5', '#0e452b', '#AA0DFE']

In [None]:
user_defined_cmap_markers = LinearSegmentedColormap.from_list('mycmap', ["#E6E6FF", "#CCCCFF", "#B2B2FF", "#9999FF",  "#6666FF",   "#3333FF", "#0000FF"])
user_defined_cmap_degs = LinearSegmentedColormap.from_list('mycmap', ["#0000FF", "#3333FF", "#6666FF", "#9999FF", "#B2B2FF", "#CCCCFF", "#E6E6FF", "#E6FFE6", "#CCFFCC", "#B2FFB2", "#99FF99", "#66FF66", "#33FF33", "#00FF00"])

In [None]:
%matplotlib inline 

## Load data for Fig. 3

### Steady state

In [None]:
path_to_h5ad = '../output/metadata/anndata_objects/Fig1_pt1_annotated.h5ad'

In [None]:
adata_d0 = sc.read_h5ad(path_to_h5ad)
adata_d0.uns['log1p']["base"] = None

### Damage

In [None]:
path_to_h5ad = '../output/metadata/anndata_objects/Fig5_annotated.h5ad'

In [None]:
adata_d147 = sc.read_h5ad(path_to_h5ad)
adata_d147.uns['log1p']["base"] = None

### Combined

In [None]:
path_to_h5ad = '../output/metadata/anndata_objects/Fig3.h5ad'

In [None]:
adata_d0147 = sc.read_h5ad(path_to_h5ad)
adata_d0147.uns['log1p']["base"] = None

### Transfer annotation

In [None]:
annotated_subsets = pd.concat([adata_d0.obs['cell_type_subset'], adata_d147.obs['cell_type_subset']])

In [None]:
adata_d0147.obs['cell_type_subset']=''

In [None]:
adata_d0147.obs['cell_type_subset'][adata_d0147.obs.index.isin(annotated_subsets.index) == True] = annotated_subsets

In [None]:
adata_d0147.uns['cell_type_subset_colors'] = ['#F6222E', '#3283FE', '#16FF32', '#BDCDFF', '#3B00FB', '#1CFFCE', '#d62728', '#19c9b3','#FFA5D2',   'grey', '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6', 'black']

In [None]:
sc.pl.umap(adata_d0147, color=['day', 'stage', 'cell_type_subset'], 
                     color_map='Spectral_r',
                     use_raw=False, 
                     ncols=4, 
                     wspace = 0.2,
                     outline_width=[0.6, 0.05], 
                     size=15,  
                     frameon=False, 
                     add_outline=True, 
                     sort_order = False)

## Include looms

In [None]:
import scvelo as scv
scv.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True, format='pdf')

In [None]:
from pathlib import Path

ds_list = []

tenexdir = '../data/10xGenomics/'
loom_path = Path(tenexdir).glob('**/**/**/**/cr-results.loom')

with loompy.new('loom_merged.loom') as dsout:  # Create a new, empty, loom file
    for f in loom_path:
        with loompy.connect(f) as ds:
            totals = ds.map([np.sum], axis=1)[0]
            cells = np.where(totals > 0)[0] # Select the cells that passed QC (totals > 500)
            ds_list.append(ds.shape[1])
            for (ix, selection, view) in ds.scan(items=cells, axis=1):
                dsout.add_columns(view.layers, col_attrs=view.ca, row_attrs=view.ra)

In [None]:
ldata = scv.read('loom_merged.loom', cache=False)

In [None]:
x = np.repeat(np.array([ '-1@mo02_CD45neg1_d4',  '-1@mo02_CD45neg1_d0', '-1@mo02_CD45neg2_d7', '-1@mo02_FB_d1', 
                         '-1@mo02_CD45neg2_d1', '-1@mo02_EC_d7', '-1@mo02_EC_d4', '-1@mo02_FB_d7', 
                         '-1@mo02_EC_d1', '-1@mo02_EC_d0', '-1@mo02_FB_d4', '-1@mo02_CD45neg2_d4',  
                         '-1@mo02_CD45neg2_d0', '-1@mo02_CD45neg1_d7', '-1@mo02_CD45neg1_d1', '-1@mo02_FB_d0',
                         '-1@mo18_EC_d1', '-1@mo18_FB_d1', '-1@mo18_CD45neg_d4', '-1@mo18_CD45neg1_d0',
                         '-1@mo18_EC_d4', '-1@mo18_FB_d4', '-1@mo18_CD45neg2_d0', '-1@mo18_CD45neg_d7',
                         '-1@mo18_CD45neg_d1', '-1@mo18_EC_d7', '-1@mo18_EC_d0', '-1@mo18_FB_d7', 
                         '-1@mo18_FB_d0']), ds_list, axis=0)

In [None]:
ldata.obs['name_match'] = x

In [None]:
start = ':'
end = 'x'

barcodes = []
for loom_index in ldata.obs_names:
    barcodes.append(str(loom_index[loom_index.find(start)+len(start):loom_index.rfind(end)]))
ldata.obs['barcodes'] = barcodes

In [None]:
ldata.obs_names = ldata.obs[['barcodes','name_match']].agg(''.join, axis=1)

In [None]:
from collections import Counter
[k for k, v in Counter(ldata.obs_names).items() if v > 1]

In [None]:
adata_d0147_loom = scv.utils.merge(adata_d0147, ldata)

In [None]:
adata_d0147_loom.shape

## Analyze TEC, FB and EC and annotate based on public marker genes and signatures

### Our ECs

In [None]:
EC_d0147 = adata_d0147[(adata_d0147.obs['cell_type_subset']=='0:arEC') | (adata_d0147.obs['cell_type_subset']=='1:capEC') | (adata_d0147.obs['cell_type_subset']=='2:venEC')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(EC_d0147, min_cells=1)

#### Identify highly variable genes (all samples but day 1) and replot the data

In [None]:
EC_d047 = EC_d0147[EC_d0147.obs['day']!='d1']

In [None]:
# Remove genes that are not expressed in any cells (remove columns with all 0s)
sc.pp.filter_genes(EC_d047, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(EC_d047, n_top_genes=3500, flavor='seurat')

In [None]:
hvgs = EC_d047.var[EC_d047.var['highly_variable']==True].index

In [None]:
EC_d047.var['highly_variable'] = ''

In [None]:
EC_d0147.var['highly_variable'] = [True if x in hvgs else False for x in EC_d0147.var['highly_variable'].index]

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(EC_d0147, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
def observe_variance(anndata_object):
    fig = plt.figure(figsize=(10,5))
    ax1 = fig.add_subplot(121)
    ax2 = fig.add_subplot(122)
    # variance per principal component
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = anndata_object.uns['pca']['variance_ratio']
    ax1.scatter(x,y,s=4)
    ax1.set_xlabel('PC')
    ax1.set_ylabel('Fraction of variance explained\n')
    ax1.set_title('Fraction of variance explained per PC\n')
    # cumulative variance explained
    cml_var_explained = np.cumsum(anndata_object.uns['pca']['variance_ratio'])
    x = range(len(anndata_object.uns['pca']['variance_ratio']))
    y = cml_var_explained
    ax2.scatter(x,y,s=4)
    ax2.set_xlabel('PC')
    ax2.set_ylabel('Cumulative fraction of variance\nexplained')
    ax2.set_title('Cumulative fraction of variance\nexplained by PCs')
    fig.tight_layout()
    plot = plt.show
    return(plot)
observe_variance(EC_d0147)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(EC_d0147, n_comps=30, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(EC_d0147, 'sample')

In [None]:
sc.pp.neighbors(EC_d0147, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(EC_d0147)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    EC_d0147, 
    color=['cell_type_subset', 'stage', 'day', 'sample'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

### Our FB

In [None]:
FB_d0147 = adata_d0147[(adata_d0147.obs['cell_type_subset']=='3:capsFB') | (adata_d0147.obs['cell_type_subset']=='4:intFB') | (adata_d0147.obs['cell_type_subset']=='5:medFB')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(FB_d0147, min_cells=1)

#### Identify highly variable genes (all samples but day 1) and replot the data

In [None]:
FB_d047 = FB_d0147[FB_d0147.obs['day']!='d1']

In [None]:
# Remove genes that are not expressed in any cells (remove columns with all 0s)
sc.pp.filter_genes(FB_d047, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(FB_d047, n_top_genes=3500, flavor='seurat')

In [None]:
hvgs = FB_d047.var[FB_d047.var['highly_variable']==True].index

In [None]:
FB_d047.var['highly_variable'] = ''

In [None]:
FB_d0147.var['highly_variable'] = [True if x in hvgs else False for x in FB_d0147.var['highly_variable'].index]

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d0147, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(FB_d0147)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(FB_d0147, n_comps=50, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(FB_d0147, 'sample')

In [None]:
sc.pp.neighbors(FB_d0147, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(FB_d0147)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    FB_d0147, 
    color=['cell_type_subset', 'stage', 'day', 'sample'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

In [None]:
capsular = ['Akr1c18',	'Mrgprg',	'Upk3b',	'Smpd3',	'Sema3c',	'Dpp4',	'Efhd1',	'Pcsk6',	'Pi16',	'Ackr3',	'Sfrp2',	'Msln',	'Sfrp4',	'Adgrd1',	'Spon2',	'Mfap5',	'Gpc3',	'Saa3',	'Ogn',	'Mt2',	'Cpxm2',	'Lrrn4cl',	'Fndc1',	'Anxa3',	'Qpct',	'Cxcl13',	'Smoc2',	'Igfbp6',	'Nov',	'Csrp2']
medullary = ['Gja4',	'Mmp9',	'Pde2a',	'Vtn',	'Des',	'Crsc',	'Ecscr',	'C1qtnf5',	'Cx3cl1',	'Acta2',	'Meox1',	'Sdc3',	'Postn',	'Tagln',	'Serpine2',	'Enpp2',	'SlcO2b1',	'Ndufa4l2',	'Serpina3g',	'Ltbp1',	'Gfra2',	'Mfge8',	'Rasa3']
perilobular = ['C7',	'Dpt',	'Ptn',	'Ptgds',	'Rbp5',	'Rspo3',	'Mfap4',	'Sparcl1',	'Srpx',	'Hspb6',	'Ccl2',	'Fxyd6',	'Nr2f1',	'Dbi',	'Spry1',	'Litaf',	'Dcn',	'Runx1t1',	'Rwdd1']
interlobular = ['Lum',	'Mgp',	'Ogn',	'Fn1',	'Igfbp6',	'Col1a2',	'Col14a1',	'Col3a1',	'Col1a1',	'Fbn1',	'Dcn',	'Itm2a',	'Ccdc80',	'Ctsk',	'Wisp2',	'Id2',	'Spon2',	'Fstl1',	'Fbln2',	'Mfap5']

In [None]:
sc.tl.score_genes(FB_d0147, gene_list = perilobular,  score_name='perilobular\n(human)', use_raw=False)
sc.tl.score_genes(FB_d0147, gene_list = interlobular,  score_name='interlobular\n(human)', use_raw=False)
sc.tl.score_genes(FB_d0147, gene_list = capsular,  score_name='capsular\n(mouse)', use_raw=False)
sc.tl.score_genes(FB_d0147, gene_list = medullary,  score_name='medullary\n(mouse)', use_raw=False)

In [None]:
sc.pl.umap(
    FB_d0147,
    color=[ 'capsular\n(mouse)', 'medullary\n(mouse)', 'perilobular\n(human)', 'interlobular\n(human)'],
    palette=user_defined_palette,  
    color_map='Spectral_r', 
    use_raw=False,
    ncols=5,
    wspace = 0.3,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    add_outline=True,
    sort_order = False
)

### Our TEC

In [None]:
TEC_d0147 = adata_d0147[(adata_d0147.obs['cell_type_subset']=='10:aaTEC1') | (adata_d0147.obs['cell_type_subset']=='11:aaTEC2') | (adata_d0147.obs['cell_type_subset']=='12:cTEC') |
                        (adata_d0147.obs['cell_type_subset']=='13:mTEC1') | (adata_d0147.obs['cell_type_subset']=='14:mTEC-prol') | (adata_d0147.obs['cell_type_subset']=='15:mTEC2') |
                        (adata_d0147.obs['cell_type_subset']=='16:mTEC3') | (adata_d0147.obs['cell_type_subset']=='17:mimic(tuft)') | (adata_d0147.obs['cell_type_subset']=='18:mimic(neuroendo)') |
                        (adata_d0147.obs['cell_type_subset']=='19:mimic(goblet)') | (adata_d0147.obs['cell_type_subset']=='20:mimic(microfold)')]

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(TEC_d0147, min_cells=1)

#### Identify highly variable genes (all samples but day 1) and replot the data

In [None]:
TEC_d047 = TEC_d0147[TEC_d0147.obs['day']!='d1']

In [None]:
# Remove genes that are not expressed in any cells (remove columns with all 0s)
sc.pp.filter_genes(TEC_d047, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(TEC_d047, n_top_genes=3500, flavor='seurat')

In [None]:
hvgs = TEC_d047.var[TEC_d047.var['highly_variable']==True].index

In [None]:
TEC_d047.var['highly_variable'] = ''

In [None]:
TEC_d0147.var['highly_variable'] = [True if x in hvgs else False for x in TEC_d0147.var['highly_variable'].index]

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_d0147, n_comps=200, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
observe_variance(TEC_d0147)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_d0147, n_comps=35, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(TEC_d0147, 'sample')

In [None]:
sc.pp.neighbors(TEC_d0147, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(TEC_d0147)

In [None]:
#35
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d0147, 
    color=['cell_type_subset', 'stage', 'day', 'sample'], 
    ncols=6,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    cmap='Spectral_r',
    wspace = 0.3,
    add_outline=True
)

## RNA velocity

In [None]:
TEC_d0147_loom_nomimics = adata_d0147_loom[(adata_d0147_loom.obs['cell_type_subset']=='10:aaTEC1') | (adata_d0147_loom.obs['cell_type_subset']=='11:aaTEC2') | (adata_d0147_loom.obs['cell_type_subset']=='12:cTEC') |
                        (adata_d0147_loom.obs['cell_type_subset']=='13:mTEC1') | (adata_d0147_loom.obs['cell_type_subset']=='14:mTEC-prol') | (adata_d0147_loom.obs['cell_type_subset']=='15:mTEC2') |
                        (adata_d0147_loom.obs['cell_type_subset']=='16:mTEC3')] 

In [None]:
# Remove columns with all 0s
sc.pp.filter_genes(TEC_d0147_loom_nomimics, min_cells=1)

In [None]:
sc.pp.highly_variable_genes(TEC_d0147_loom_nomimics, n_top_genes=3500, n_bins=20, flavor='seurat', inplace=True)

In [None]:
rng = np.random.RandomState(42)
sc.tl.pca(TEC_d0147_loom_nomimics, n_comps=20, svd_solver='arpack', random_state=rng, use_highly_variable=True)

In [None]:
sce.pp.harmony_integrate(TEC_d0147_loom_nomimics, 'sample')

In [None]:
sc.pp.neighbors(TEC_d0147_loom_nomimics, n_neighbors=15, use_rep='X_pca_harmony')
sc.tl.umap(TEC_d0147_loom_nomimics)

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    TEC_d0147_loom_nomimics, 
    color=['cell_type_subset', 'stage'], 
    ncols=2,
    add_outline=True,
    frameon=False,
    wspace = 0.7,
    sort_order = False
)

In [None]:
scv.pp.moments(TEC_d0147_loom_nomimics, use_rep='X_pca_harmony')

In [None]:
scv.tl.velocity(TEC_d0147_loom_nomimics, mode='stochastic')

In [None]:
scv.tl.velocity_graph(TEC_d0147_loom_nomimics)

In [None]:
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap')

In [None]:
scv.pl.velocity_embedding_grid(TEC_d0147_loom_nomimics, basis='umap', color=['cell_type_subset'], alpha=0.7, legend_loc='right')

In [None]:
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', color=['cell_type_subset'],alpha=0.7, legend_loc='right')

In [None]:
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', color=['cell_type_subset'],alpha=0.7, legend_loc='right')

In [None]:
TEC_d0147_loom_nomimics.obs['distinct'] = ''
TEC_d0147_loom_nomimics.obs['distinct'] = TEC_d0147_loom_nomimics.obs[['stage', 'day', 'cell_type_subset']].agg(' '.join, axis=1)

In [None]:
TEC_d0147_loom_nomimics.uns['distinct_colors'] = ['#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5',
                                                  '#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5',
                                                  '#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5',
                                                  '#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5',
                                                  '#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5',
                                                  '#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5',
                                                  '#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5',
                                                  '#2ed9ff', '#c1c119', '#8b0000', '#fe00fa', '#f8a19f', '#1cbe4f', '#b5efb5']

In [None]:
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', groups=['02mo d0 10:aaTEC1', '02mo d0 11:aaTEC2', '02mo d0 12:cTEC', '02mo d0 13:mTEC1', '02mo d0 14:mTEC-prol', '02mo d0 15:mTEC2', '02mo d0 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='02mo_d0_bystage.svg')
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', groups=['02mo d1 10:aaTEC1', '02mo d1 11:aaTEC2', '02mo d1 12:cTEC', '02mo d1 13:mTEC1', '02mo d1 14:mTEC-prol', '02mo d1 15:mTEC2', '02mo d1 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='02mo_d1_bystage.svg')
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', groups=['02mo d4 10:aaTEC1', '02mo d4 11:aaTEC2', '02mo d4 12:cTEC', '02mo d4 13:mTEC1', '02mo d4 14:mTEC-prol', '02mo d4 15:mTEC2', '02mo d4 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='02mo_d4_bystage.svg')
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', groups=['02mo d7 10:aaTEC1', '02mo d7 11:aaTEC2', '02mo d7 12:cTEC', '02mo d7 13:mTEC1', '02mo d7 14:mTEC-prol', '02mo d7 15:mTEC2', '02mo d7 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='02mo_d7_bystage.svg')
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', groups=['18mo d0 10:aaTEC1', '18mo d0 11:aaTEC2', '18mo d0 12:cTEC', '18mo d0 13:mTEC1', '18mo d0 14:mTEC-prol', '18mo d0 15:mTEC2', '02mo d0 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='18mo_d0_bystage.svg')
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', groups=['18mo d1 10:aaTEC1', '18mo d1 11:aaTEC2', '18mo d1 12:cTEC', '18mo d1 13:mTEC1', '18mo d1 14:mTEC-prol', '18mo d1 15:mTEC2', '02mo d1 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='18mo_d1_bystage.svg')
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', groups=['18mo d4 10:aaTEC1', '18mo d4 11:aaTEC2', '18mo d4 12:cTEC', '18mo d4 13:mTEC1', '18mo d4 14:mTEC-prol', '18mo d4 15:mTEC2', '02mo d4 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='18mo_d4_bystage.svg')
scv.pl.velocity_embedding_stream(TEC_d0147_loom_nomimics, basis='umap', groups=['18mo d7 10:aaTEC1', '18mo d7 11:aaTEC2', '18mo d7 12:cTEC', '18mo d7 13:mTEC1', '18mo d7 14:mTEC-prol', '18mo d7 15:mTEC2', '02mo d7 16:mTEC3'], color='distinct', size=100, alpha=0.7, legend_loc='right', save='18mo_d7_bystage.svg')

### Proportions over time within stage

In [None]:
TEC_d0147.obs['distinct'] = ''
TEC_d0147.obs['distinct'] = TEC_d0147.obs[['stage', 'day']].agg(' '.join, axis=1)

In [None]:
TEC_d0147_2mo = TEC_d0147[TEC_d0147.obs['stage']=='02mo']

In [None]:
crosstb = pd.crosstab(TEC_d0147_2mo.obs['day'], TEC_d0147_2mo.obs['cell_type_subset'], normalize='index')
# new NEW annotation 
user_defined_palette =  ['#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6', 'black']

In [None]:
ax = crosstb.plot(kind="area", stacked=True, color=user_defined_palette, )
ax.invert_yaxis()
ax.legend(title='cell_type_subset', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.savefig('proportions-over-time_02mo.pdf')

In [None]:
TEC_d0147_18mo = TEC_d0147[TEC_d0147.obs['stage']=='18mo']

In [None]:
crosstb = pd.crosstab(TEC_d0147_18mo.obs['day'], TEC_d0147_18mo.obs['cell_type_subset'], normalize='index')
# new NEW annotation 
user_defined_palette =  ['#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6', 'black']

In [None]:
ax = crosstb.plot(kind="area", stacked=True, color=user_defined_palette)
ax.invert_yaxis()
ax.legend(title='cell_type_subset', bbox_to_anchor=(1, 1.02), loc='upper left')
plt.savefig('proportions-over-time_18mo.pdf')

In [None]:
import re

### d4 vs d1 (02mo)

In [None]:
adata_d0147.obs['day_and_subset'] = ''
adata_d0147.obs['day_and_subset'] = adata_d0147.obs[['day', 'cell_type_subset']].agg('_'.join, axis=1)

In [None]:
adata_d0147_02mo = adata_d0147[adata_d0147.obs['stage']=='02mo']

In [None]:
writer = pd.ExcelWriter('adata_d0147_02mo_d4_wilcox.xlsx', engine='xlsxwriter')

for subset in ["0:arEC", "1:capEC", "2:venEC", "3:capsFB", "4:intFB", "5:medFB", "6:MEC", "7:vSMC/PC", "8:nmSC","9:Fat", "10:aaTEC1",  "11:aaTEC2",           
               "12:cTEC", "13:mTEC1", "14:mTEC-prol", "15:mTEC2", "16:mTEC3", "17:mimic(tuft)", "18:mimic(neuroendo)", "19:mimic(goblet)", "20:mimic(microfold)"]:   
    sc.tl.rank_genes_groups(adata_d0147_02mo, 'day_and_subset', groups=['d4_'+subset], reference='d1_'+subset, method='wilcoxon', use_raw=False)
    result = adata_d0147_02mo.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    pd.DataFrame(
        {group + '_' + key[:1]: result[key][group]
        for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer, sheet_name=re.search('.*:(.+)', subset).group(1).replace('/', '-'))
        
writer.save()

### d7 vs d1 (02mo)

In [None]:
writer = pd.ExcelWriter('adata_d0147_02mo_d7_wilcox.xlsx', engine='xlsxwriter')

for subset in ["0:arEC", "1:capEC", "2:venEC", "3:capsFB", "4:intFB", "5:medFB", "6:MEC", "7:vSMC/PC", "8:nmSC","9:Fat", "10:aaTEC1",  "11:aaTEC2",           
               "12:cTEC", "13:mTEC1", "14:mTEC-prol", "15:mTEC2", "16:mTEC3", "17:mimic(tuft)", "18:mimic(neuroendo)", "19:mimic(goblet)", "20:mimic(microfold)"]:   
    sc.tl.rank_genes_groups(adata_d0147_02mo, 'day_and_subset', groups=['d7_'+subset], reference='d1_'+subset, method='wilcoxon', use_raw=False)
    result = adata_d0147_02mo.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    pd.DataFrame(
        {group + '_' + key[:1]: result[key][group]
        for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer, sheet_name=re.search('.*:(.+)', subset).group(1).replace('/', '-'))
        
writer.save()

### d4 vs d1 (18mo)

In [None]:
adata_d0147_18mo = adata_d0147[adata_d0147.obs['stage']=='18mo']

In [None]:
writer = pd.ExcelWriter('adata_d0147_18mo_d4_wilcox.xlsx', engine='xlsxwriter')

for subset in ["0:arEC", "1:capEC", "2:venEC", "3:capsFB", "4:intFB", "5:medFB", "6:MEC", "7:vSMC/PC", "8:nmSC","9:Fat", "10:aaTEC1",  "11:aaTEC2",           
               "12:cTEC", "13:mTEC1", "14:mTEC-prol", "15:mTEC2", "16:mTEC3", "17:mimic(tuft)", "18:mimic(neuroendo)", "19:mimic(goblet)", "20:mimic(microfold)"]:   
    sc.tl.rank_genes_groups(adata_d0147_18mo, 'day_and_subset', groups=['d4_'+subset], reference='d1_'+subset, method='wilcoxon', use_raw=False)
    result = adata_d0147_18mo.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    pd.DataFrame(
        {group + '_' + key[:1]: result[key][group]
        for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer, sheet_name=re.search('.*:(.+)', subset).group(1).replace('/', '-'))
        
writer.save()

### d7 vs d1 (18mo)

In [None]:
writer = pd.ExcelWriter('adata_d0147_18mo_d7_wilcox.xlsx', engine='xlsxwriter')

for subset in ["0:arEC", "1:capEC", "2:venEC", "3:capsFB", "4:intFB", "5:medFB", "6:MEC", "7:vSMC/PC", "8:nmSC","9:Fat", "10:aaTEC1",  "11:aaTEC2",           
               "12:cTEC", "13:mTEC1", "14:mTEC-prol", "15:mTEC2", "16:mTEC3", "17:mimic(tuft)", "18:mimic(neuroendo)", "19:mimic(goblet)", "20:mimic(microfold)"]:   
    sc.tl.rank_genes_groups(adata_d0147_18mo, 'day_and_subset', groups=['d7_'+subset], reference='d1_'+subset, method='wilcoxon', use_raw=False)
    result = adata_d0147_18mo.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    pd.DataFrame(
        {group + '_' + key[:1]: result[key][group]
        for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer, sheet_name=re.search('.*:(.+)', subset).group(1).replace('/', '-'))
        
writer.save()

### 18mo vs 02mo (day 4)

In [None]:
adata_d0147.obs['stage_and_subset'] = ''
adata_d0147.obs['stage_and_subset'] = adata_d0147.obs[['stage', 'cell_type_subset']].agg('_'.join, axis=1)

In [None]:
adata_d0147_d4 = adata_d0147[adata_d0147.obs['day']=='d4']

In [None]:
writer = pd.ExcelWriter('adata_18vs02mo_d4_wilcox.xlsx', engine='xlsxwriter')

for subset in ["0:arEC", "1:capEC", "2:venEC", "3:capsFB", "4:intFB", "5:medFB", "6:MEC", "7:vSMC/PC", "8:nmSC","9:Fat", "10:aaTEC1",  "11:aaTEC2",           
               "12:cTEC", "13:mTEC1", "14:mTEC-prol", "15:mTEC2", "16:mTEC3", "17:mimic(tuft)", "18:mimic(neuroendo)", "19:mimic(goblet)", "20:mimic(microfold)"]:   
    sc.tl.rank_genes_groups(adata_d0147_d4, 'stage_and_subset', groups=['18mo_'+subset], reference='02mo_'+subset, method='wilcoxon', use_raw=False)
    result = adata_d0147_d4.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    pd.DataFrame(
        {group + '_' + key[:1]: result[key][group]
        for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer, sheet_name=re.search('.*:(.+)', subset).group(1).replace('/', '-'))
        
writer.save()

### 18mo vs 02mo (day 7)

In [None]:
adata_d0147_d7 = adata_d0147[adata_d0147.obs['day']=='d7']

In [None]:
writer = pd.ExcelWriter('adata_18vs02mo_d7_wilcox.xlsx', engine='xlsxwriter')

for subset in ["0:arEC", "1:capEC", "2:venEC", "3:capsFB", "4:intFB", "5:medFB", "6:MEC", "7:vSMC/PC", "8:nmSC","9:Fat", "10:aaTEC1",  "11:aaTEC2",           
               "12:cTEC", "13:mTEC1", "14:mTEC-prol", "15:mTEC2", "16:mTEC3", "17:mimic(tuft)", "18:mimic(neuroendo)", "19:mimic(goblet)", "20:mimic(microfold)"]:   
    sc.tl.rank_genes_groups(adata_d0147_d7, 'stage_and_subset', groups=['18mo_'+subset], reference='02mo_'+subset, method='wilcoxon', use_raw=False)
    result = adata_d0147_d7.uns['rank_genes_groups']
    groups = result['names'].dtype.names
    pd.DataFrame(
        {group + '_' + key[:1]: result[key][group]
        for group in groups for key in ['names', 'scores', 'logfoldchanges', 'pvals_adj']}).to_excel(writer, sheet_name=re.search('.*:(.+)', subset).group(1).replace('/', '-'))
        
writer.save()

### Dotchart using score and fdr from wilcox

In [None]:
%load_ext rpy2.ipython

In [None]:
%R if (!require("pacman")) install.packages("pacman")
%R pacman::p_load(MAST, data.table, openxlsx, ggplot2, ggpubr, RColorBrewer, dichromat, readxl, ggpubr, dplyr, arrow, feather, DelayedArray, HDF5Array, scales, parallel)

In [None]:
%%R 

# read in all available excel sheet names 
wilcox_results = excel_sheets('adata_d0147_02mo_d7_wilcox.xlsx')

rnk_items_list = NULL

for (item in wilcox_results) {
      wilcox_result <- read_excel('adata_d0147_02mo_d7_wilcox.xlsx', sheet = item)
      rnk_item = na.omit(wilcox_result[,c(2,3)])
      rnk_item_sorted = rnk_item[order(rnk_item[,2], decreasing = TRUE),]
      colnames(rnk_item_sorted)[1] = '#primerid' # comment out header
      colnames(rnk_item_sorted)[2] = '#rank_score' # comment out header
      rnk_items_list[[item]] = rnk_item_sorted
      write.table(rnk_item_sorted, file = paste0('../output/metadata/gsea_items/input_ranks/d7d4vsd1/wilcox_result_', item, '_d7vsd1_02mo.rnk'), sep='\t', row.names = FALSE, quote = FALSE)
}

# read in all available excel sheet names 
wilcox_results = excel_sheets('adata_d0147_02mo_d4_wilcox.xlsx')

rnk_items_list = NULL

for (item in wilcox_results) {
      wilcox_result <- read_excel('adata_d0147_02mo_d4_wilcox.xlsx', sheet = item)
      rnk_item = na.omit(wilcox_result[,c(2,3)])
      rnk_item_sorted = rnk_item[order(rnk_item[,2], decreasing = TRUE),]
      colnames(rnk_item_sorted)[1] = '#primerid' # comment out header
      colnames(rnk_item_sorted)[2] = '#rank_score' # comment out header
      rnk_items_list[[item]] = rnk_item_sorted
      write.table(rnk_item_sorted, file = paste0('../output/metadata/gsea_items/input_ranks/d7d4vsd1/wilcox_result_', item, '_d4vsd1_02mo.rnk'), sep='\t', row.names = FALSE, quote = FALSE)
}


# read in all available excel sheet names 
wilcox_results = excel_sheets('adata_d0147_18mo_d7_wilcox.xlsx')

rnk_items_list = NULL

for (item in wilcox_results) {
      wilcox_result <- read_excel('adata_d0147_18mo_d7_wilcox.xlsx', sheet = item)
      rnk_item = na.omit(wilcox_result[,c(2,3)])
      rnk_item_sorted = rnk_item[order(rnk_item[,2], decreasing = TRUE),]
      colnames(rnk_item_sorted)[1] = '#primerid' # comment out header
      colnames(rnk_item_sorted)[2] = '#rank_score' # comment out header
      rnk_items_list[[item]] = rnk_item_sorted
      write.table(rnk_item_sorted, file = paste0('../output/metadata/gsea_items/input_ranks/d7d4vsd1/wilcox_result_', item, '_d7vsd1_18mo.rnk'), sep='\t', row.names = FALSE, quote = FALSE)
}


# read in all available excel sheet names 
wilcox_results = excel_sheets('adata_d0147_18mo_d4_wilcox.xlsx')

rnk_items_list = NULL

for (item in wilcox_results) {
      wilcox_result <- read_excel('adata_d0147_18mo_d4_wilcox.xlsx', sheet = item)
      rnk_item = na.omit(wilcox_result[,c(2,3)])
      rnk_item_sorted = rnk_item[order(rnk_item[,2], decreasing = TRUE),]
      colnames(rnk_item_sorted)[1] = '#primerid' # comment out header
      colnames(rnk_item_sorted)[2] = '#rank_score' # comment out header
      rnk_items_list[[item]] = rnk_item_sorted
      write.table(rnk_item_sorted, file = paste0('../output/metadata/gsea_items/input_ranks/d7d4vsd1/wilcox_result_', item, '_d4vsd1_18mo.rnk'), sep='\t', row.names = FALSE, quote = FALSE)
}

In [None]:
%%R 

#'adata_d0147_02mo_d4_wilcox.xlsx', 'adata_d0147_02mo_d7_wilcox.xlsx', 'adata_d0147_18mo_d4_wilcox.xlsx', 'adata_d0147_18mo_d7_wilcox.xlsx',
wilcox_results_combined = NULL

for (selected_comparison in c('../output/metadata/wilcox_items/adata_d0147_02mo_d4_wilcox.xlsx', '../output/metadata/wilcox_items/adata_d0147_02mo_d7_wilcox.xlsx', '../output/metadata/wilcox_items/adata_d0147_18mo_d4_wilcox.xlsx', '../output/metadata/wilcox_items/adata_d0147_18mo_d7_wilcox.xlsx')) {
    wilcox_results = excel_sheets(selected_comparison)
    for (item in wilcox_results) {
        wilcox_result <- read_excel(selected_comparison, sheet = item)
        colnames(wilcox_result) <- c('index', 'name', 'score', 'log2_fc', 'p_adj')
        wilcox_result$p_adj[wilcox_result$p_adj == 0] <- min(wilcox_result$p_adj[wilcox_result$p_adj>0])
        wilcox_result$`-log10(p_adj)` = (-log(wilcox_result$p_adj, 10))
        wilcox_result$subset = item
        wilcox_result$comparison = selected_comparison
        wilcox_result_sorted = wilcox_result[order(wilcox_result$score, decreasing = TRUE),]
        wilcox_result_sorted = wilcox_result_sorted[wilcox_result_sorted$p_adj<=0.05,]
        wilcox_results_combined = bind_rows(wilcox_results_combined, wilcox_result_sorted) # select # of top genes per subset 
    }
}

In [None]:
%%R

cellchat_emt_L = c('Cxcl12',	'Ccl19',	'Ccl21a',	'Ccl25',	'Fgf1',	'Fgf2',	'Fgf7',	'Fgf10',	'Fgf18',	'Fgf21', 	'Bmp4',	'Bmp7',	'Flt3l',	'Kitl',
                   'Nrg1', 'Lama1', 'Igf1', 'Col4a5', 'Lama2', 'Cadm1', 'Mdk', 'Ptn', 'Vtn', 'Thbs1', 'Tnxb', 'Angptl4', 'Il6', 'Lgals1','Ptx3', 'Serpine1', 'Tgfbi', 'Mgp', 'Tgfbr3')


tt = wilcox_results_combined[wilcox_results_combined$name %in% cellchat_emt_L,]

tt$name <- factor(tt$name, levels = rev(c('Cxcl12',	'Ccl19',	'Ccl21a',	'Ccl25',	'Fgf7', 'Fgf1',	'Fgf2',		'Fgf10',	'Fgf18',	'Fgf21',	'Bmp4',	'Bmp7',	'Flt3l',	'Kitl',
                                          'Lgals1','Ptx3', 'Angptl4', 'Il6', 'Tnxb', 'Thbs1', 'Vtn', 'Ptn', 'Mdk',  'Cadm1',  'Col4a5', 'Serpine1', 'Lama2',  'Tgfbr3','Tgfbi', 'Mgp', 'Igf1',  'Lama1', 'Nrg1')))
tt$subset <- factor(tt$subset, levels = c('capsFB', 'intFB', 'medFB', 'arEC', 'capEC', 'venEC', 'cTEC', 'mTEC1', 'mTEC-prol', 'mTEC2', 'mimic(tuft)', 'aaTEC1', 'MEC', 'vSMC-PC', 'nmSC' ))

tt$`squished Z-score` = squish(tt$score, range=c(-3, 3), only.finite=TRUE)


In [None]:
%%R -w 24 -h 33 -u cm

pdf("dotplot_Zscore_d74vsd1_emtL-up.pdf", width=9, height=12.25)

print(ggdotchart(tt, x='name', y='subset', group = 'subset',rotate=TRUE, color='squished Z-score',  size = '-log10(p_adj)', facet.by='comparison', sorting='none', xlab = "",  ylab = "") +
scale_color_gradientn(colours = dichromat::colorschemes$BluetoGreen.14) +  
theme_pubr() + theme(legend.position='right', axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)))
dev.off() 

## Save h5ad files for CellChat

In [None]:
subset_palette =  ['#F6222E', '#3283FE', '#16FF32', '#BDCDFF', '#3B00FB', '#1CFFCE', '#d62728', '#19c9b3','#FFA5D2',   'grey', '#2ED9FF', '#c1c119', '#8b0000', '#FE00FA', "#F8A19F", '#1CBE4F','#B5EFB5',  '#AA0DFE','#FEAF16', '#325A9B', '#C075A6', 'black']

sc.pl.umap(adata_d0147, color=[ 'cell_type_subset'], 
                        color_map='Spectral_r',
                        palette=subset_palette,
                        use_raw=False, 
                        ncols=4, 
                        wspace = 0.3,
                        outline_width=[0.6, 0.05], 
                        size=15,  
                        frameon=False, 
                        add_outline=True, 
                        sort_order = False)

In [None]:
adata_d0147_light = adata_d0147[(adata_d0147.obs['cell_type_subset']!='6:MEC') & (adata_d0147.obs['cell_type_subset']!='7:vSMC/PC') & (adata_d0147.obs['cell_type_subset']!='8:nmSC') & (adata_d0147.obs['cell_type_subset']!='9:Fat')]

In [None]:
sc.set_figure_params(dpi=80, dpi_save=300, color_map='viridis', vector_friendly=True, transparent=True)

sc.pl.umap(
    adata_d0147_light, 
    color=['day', 'stage', 'cell_type_subset'], 
    ncols=3,
    outline_width=[0.6, 0.05],
    size=15,
    frameon=False,
    wspace = 0.3,
    add_outline=True
)

In [None]:
stage = ['02mo', '18mo']
day = ['d0', 'd1', 'd4', 'd7']
for x in stage:
    for y in day:
            dataset = adata_d0147_light[(adata_d0147_light.obs['stage'] == x) & (adata_d0147_light.obs['day'] == y)]
            path_to_h5ad = '../../../../cd45neg_0147_SLTBI_vdb_'+x+'_'+y+'_new.h5ad'
            dataset.write(path_to_h5ad)