<font size="+3.8">Scanpy single-cell pre-processing</font>  
<font size="+1.5"></font>  

Aim: Preprocess mouse brain single-cell data from Zhao 2020 Nat Comm

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import matplotlib.pyplot as plt
import os
from datetime import date
from matplotlib.pyplot import rc_context

In [None]:
import utils

In [None]:
sc.settings.verbosity = 3

In [None]:
main_dir='/run/user/1000/gvfs/smb-share:server=138.245.4.35,share=bd-dichgans/SF' # Linux

In [None]:
main_dir='\\\isdsynnas.srv.med.uni-muenchen.de\BD-Dichgans\SF' # Win

In [None]:
dataset_name='Zhao2020'
organism='Mouse'

# Load data

Downloaded from: https://singlecell.broadinstitute.org/single_cell/study/SCP829/aging-mouse-brain-kolab?cluster=tSNE%20without%20batch%20correction&spatialGroups=--&annotation=orig.ident--group--study&subsample=all#study-download 06/03/2023

In [None]:
from scipy.io import mmread
matrix = mmread(main_dir+"\\P6_vasc_scRNAseq\Zhao2020\\gene_sorted-matrix.mtx.gz")

In [None]:
matrix.T

In [None]:
obs = pd.read_table(main_dir+"\\P6_vasc_scRNAseq\Zhao2020\\barcodes.tsv", header=None)
var = pd.read_table(main_dir+"\\P6_vasc_scRNAseq\Zhao2020\\genes.tsv", header=None)

In [None]:
ad = sc.AnnData(matrix.T, obs=obs, var=var)
ad
ad.X = ad.X.todense() # COOrdinate format not subsettable 

In [None]:
del matrix
del obs
del var

In [None]:
ad.obs.index = ad.obs[0]
ad.obs.head()

In [None]:
all(ad.var[0] == ad.var[1])

In [None]:
ad.var_names = ad.var[0]
ad.var_names

In [None]:
# add metadata
meta = pd.read_csv(main_dir+"\\P6_vasc_scRNAseq\Zhao2020\\meta.csv", sep=",", skiprows=[1])
meta.index = meta["NAME"]
meta.index.name = None
meta=meta.drop("NAME",axis=1)
meta.head()

In [None]:
assert all(ad.obs.index == meta.index)
ad.obs = meta
del meta
ad

In [None]:
# add tsne and annotations
anno = pd.read_csv(main_dir+"\\P6_vasc_scRNAseq\Zhao2020\\tSNE_without_correction.csv", sep=",", skiprows=[1])
anno.index = anno["NAME"]
anno.index.name = None
anno=anno.drop("NAME",axis=1)
anno.head(3)

In [None]:
anno.shape

In [None]:
ad.obs = ad.obs.join(anno)
del anno
ad.obs.head(3)

In [None]:
ad.obsm["X_tsne"] = np.array(ad.obs[["X","Y"]])

In [None]:
pd.value_counts(ad.obs["orig.ident"])

In [None]:
pd.value_counts(ad.obs["Batch"])

In [None]:
pd.value_counts(ad.obs["Age"])

In [None]:
pd.value_counts(ad.obs["Celltype"])

In [None]:
pd.value_counts(ad.obs["Subtype"])

In [None]:
sc.pl.tsne(ad,color=["Celltype","Age"], wspace=0.4)

In [None]:
sc.pl.tsne(ad,color="Subtype")

In [None]:
# drop experimental condition
ad = ad[ad.obs['Age'].isin(['old', 'young'])]
ad

In [None]:
# re-name age groups

In [None]:
ad.obs['age2'] = ad.obs['Age']

In [None]:
new_names = {
    '2-3m': {'young'},
    '18-20m': {'old'}
}
result = {}
for key, value in new_names.items():
    for elem in value:
        result[(elem)] = key
new_cluster_names = dict(sorted(result.items()))
print(new_cluster_names)

In [None]:
ad.obs['age'] = (
    ad.obs['age2']
    .map(new_cluster_names)
    .astype('category')
)

In [None]:
ad.obs['age']

In [None]:
ad.obs['age'] = ad.obs['age'].cat.reorder_categories(['2-3m', '18-20m'])

In [None]:
ad.obs=ad.obs.drop(["Age","X","Y"],axis=1)

In [None]:
pd.value_counts(ad.obs["age"])

In [None]:
# verify that age groups are correct
# plot known genes upreglated during aging in mouse brain (Allen 2023 Cell)
sc.pl.dotplot(ad, ["C4b","Ifit3","Il33","Bmp6"], groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", title="All cell types")

In [None]:
pd.value_counts(ad.obs["Celltype"])

In [None]:
# also see paper

In [None]:
new_names = {
    'Endothelial cells': {'EC'},
    'Oligos': {'OLG'},
    'Astrocytes': {'AC'},
    'Pericytes': {'PC'},
    'SMCs': {'SMC'},
    'Neurons': {'imNeur','mNeur','NRP'},
    'OPCs': {'OPC'},
    'Microglia/Macrophages': {'MG','MAC','TNC'},
    'Ependymal cells': {'EPC','CPC'},
    'Immune_Other': {'MNC'},
    'Hemoglobin-expressing vascular cells':{'Hb_EC'},
}
result = {}
for key, value in new_names.items():
    for elem in value:
        result[(elem)] = key
new_cluster_names = dict(sorted(result.items()))
print(new_cluster_names)

In [None]:
ad.obs['clusters'] = (
    ad.obs['Celltype']
    .map(new_cluster_names)
    .astype('category')
)

In [None]:
pd.value_counts(ad.obs["clusters"])

In [None]:
pd.value_counts(ad.obs["Subtype"])

In [None]:
new_names = {
    'vECs': {'V'},
    'capECs': {'Cap','VCap'},
    'aECs': {'A1','A2'},
    'SMCs': {'aSMC','aaSMC','vSMC','SMC_unclassified'},
    'ECs_unclassified': {'AV','EC_unclassified'},
}
result = {}
for key, value in new_names.items():
    for elem in value:
        result[(elem)] = key
new_cluster_names = dict(sorted(result.items()))
print(new_cluster_names)

In [None]:
ad.obs['subclusters'] = (
    ad.obs['Subtype']
    .map(new_cluster_names)
    .astype('category')
)

In [None]:
pd.value_counts(ad.obs["subclusters"])

In [None]:
ad.obs["clusters2"] = np.where(ad.obs["subclusters"].isna(), ad.obs["clusters"], ad.obs["subclusters"])

In [None]:
pd.value_counts(ad.obs["clusters2"])

In [None]:
ad.var_names

In [None]:
sc.pl.dotplot(ad, var_names=['Cldn5',"Aqp4","Mbp",'Foxj1','Pdgfrb','Vtn','Kcnj8','Rgs5','Atp13a5','Tagln','Acta2','Myh11'], groupby="clusters", swap_axes=False);

Last 3 cols are SMC markers

Check if data is normalized:

In [None]:
ad.layers

In [None]:
# show expression of 100 random genes (across all spots)
import random
import seaborn as sns
random_genes=random.sample(range(0, ad.X.shape[1]), 100)
adata_sub = ad[:,random_genes]
exp=pd.DataFrame(np.matrix(adata_sub.X))
pl1=sns.displot(data=pd.melt(exp),x='value',height=4,hue='variable',kind="kde",warn_singular=False,legend=False,palette=list(np.repeat('#086da6',100)), lw=0.3) # genes with 0 expression are excluded
pl1.set(xlim=(-0.5, 7),ylim=(0,0.007));
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(4,4)})
pl=sns.histplot(data=pd.melt(exp),x='value',binwidth=0.5,legend=True,palette=list(np.repeat('#086da6',100)))
pl.set(xlim=(0, 10));
sns.set_style("ticks")
plt.show()

In [None]:
np.matrix(adata_sub.X)

Data seems to be normalized

In [None]:
ad.layers["normalized"] = ad.X.copy() # save normalized + log-transformed (but unscaled) counts - retrieve via adata.X = adata.layers["normalized"]

In [None]:
ad

In [None]:
ad.layers

In [None]:
sc.pl.tsne(ad, color=['Foxf2'])

# Focus on: Foxf2

In [None]:
adata=ad
del ad

In [None]:
adata_backup = adata

In [None]:
# drop aged samples
# adata = adata[adata.obs['age'].isin(['2-3m'])]

In [None]:
gene="Foxf2"

## Plot

### Excl zonation

In [None]:
sc.pl.matrixplot(adata, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.dotplot(adata, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
utils.summarize_gene_expression(adata = adata, gene = gene, groupby = "clusters", 
                                study_name = dataset_name, organism = organism,
                                export = True, output_dir = os.path.join(main_dir, "P06_Foxf2_per_celltype", "Foxf2_summarized")
                               )

### Incl zonation

In [None]:
utils.summarize_gene_expression(adata = adata, gene = gene, groupby = "clusters2", 
                                study_name = dataset_name, organism = organism,
                                export = True, output_dir = os.path.join(main_dir, "P06_Foxf2_per_celltype", "Foxf2_summarized", "incl_zonation")
                               )

# Focus on: Other genes

In [None]:
target_genes = ["Foxo1", "Tek", "Nos3", "Htra1", "Egfl8", "Flt1", "Kdr", "Nrp1", "Nrp2", "Efnb2", "Itgb1", "Itga6", "Angpt2", "Cdh5", "Cldn5", "Ocln", "Ctnnb1"]

### Excl zonation

In [None]:
groupby = "clusters"

In [None]:
other_genes_results = {
    gene: utils.summarize_gene_expression(adata, gene, study_name = dataset_name, organism = organism, groupby = groupby, 
                                          output_dir=os.path.join(main_dir, "P06_Foxf2_per_celltype", "Other_genes_summarized"), export=True
                                         ) for gene in target_genes
}

In [None]:
# some plots

In [None]:
sc.pl.matrixplot(adata, [target_genes[0]], groupby=groupby, swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.dotplot(adata, [target_genes[0]], groupby=groupby, swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.matrixplot(adata, [target_genes[1]], groupby=groupby, swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.dotplot(adata, [target_genes[1]], groupby=groupby, swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

### Incl zonation

In [None]:
groupby = "clusters2"

In [None]:
other_genes_results = {
    gene: utils.summarize_gene_expression(adata, gene, study_name = dataset_name, organism = organism, groupby = groupby, 
                                          output_dir=os.path.join(main_dir, "P06_Foxf2_per_celltype", "Other_genes_summarized", "incl_zonation"), export=True
                                         ) for gene in target_genes
}

In [None]:
# some plots

In [None]:
sc.pl.matrixplot(adata, [target_genes[0]], groupby=groupby, swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.dotplot(adata, [target_genes[0]], groupby=groupby, swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.matrixplot(adata, [target_genes[1]], groupby=groupby, swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

In [None]:
sc.pl.dotplot(adata, [target_genes[1]], groupby=groupby, swap_axes=False, figsize=(2,5), standard_scale="var", layer="normalized")

# Focus on: Aging

In [None]:
adata = adata_backup

In [None]:
pd.value_counts(adata.obs["age"])

## Foxf2 by age group

In [None]:
gene=["Foxf2","Foxo1","Tek","Nos3"]

### All cell types

In [None]:
sc.pl.dotplot(adata, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="All cell types")

In [None]:
adata

### ECs only

In [None]:
# ECs only
adata_ecs = adata[adata.obs['clusters'].isin(['Endothelial cells'])]
adata_ecs

In [None]:
pd.value_counts(adata_ecs.obs["age"])

In [None]:
sc.pl.dotplot(adata_ecs, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="ECs")

### PCs only

In [None]:
adata_pcs = adata[adata.obs['clusters'].isin(['Pericytes'])]
pd.value_counts(adata_pcs.obs["age"])

In [None]:
sc.pl.dotplot(adata_pcs, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="PCs")

### SMCs only

In [None]:
adata_pcs = adata[adata.obs['clusters'].isin(['SMCs'])]
pd.value_counts(adata_pcs.obs["age"])

In [None]:
sc.pl.dotplot(adata_pcs, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="SMCs")

### Neurons only

In [None]:
adata_neur = adata[adata.obs['clusters'].isin(['Neurons'])]
pd.value_counts(adata_neur.obs["age"])

In [None]:
sc.pl.dotplot(adata_neur, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="Neurons")

### Astrocytes only

In [None]:
adata_astro = adata[adata.obs['clusters'].isin(['Astrocytes'])]
pd.value_counts(adata_astro.obs["age"])

In [None]:
sc.pl.dotplot(adata_astro, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="Astrocytes")

### Oligos only

In [None]:
adata_olig = adata[adata.obs['clusters'].isin(['Oligos'])]
pd.value_counts(adata_olig.obs["age"])

In [None]:
sc.pl.dotplot(adata_olig, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="Oligos")

### Microglia only

In [None]:
adata_mg = adata[adata.obs['clusters'].isin(['Microglia/Macrophages'])]
pd.value_counts(adata_mg.obs["age"])

In [None]:
sc.pl.dotplot(adata_mg, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="Microglia/Macrophages")

## Correlate gene expression (Foxf2 and Foxo1)

### ECs

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adata

In [None]:
adata_ecs

In [None]:
adat = adata_ecs
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
import scipy as sp
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

In [None]:
sns.set(style="whitegrid")
ax = sns.boxplot(x="age", y="Foxf2", data=subs, showfliers = False, palette="Blues")
ax = sns.stripplot(x="age", y="Foxf2", data=subs, color=".25",size=1)
plt.show()

In [None]:
sns.set(style="whitegrid")
ax = sns.boxplot(x="age", y="Foxo1", data=subs, showfliers = False, palette="Blues")
ax = sns.stripplot(x="age", y="Foxo1", data=subs, color=".25",size=1)
plt.show()

Exclude dropouts (cells with 0 expression of both genes)

In [None]:
coex = (adata_ecs[:,'{}'.format("Foxf2")].X > 0) | (adata_ecs[:,'{}'.format("Foxo1")].X > 0)

In [None]:
adata_ecs_above0 = adata_ecs[coex]
adata_ecs_above0

In [None]:
pd.value_counts(adata_ecs_above0.obs["age"])

In [None]:
sc.pl.dotplot(adata_ecs_above0, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="ECs")

In [None]:
adat = adata_ecs_above0
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

In [None]:
sns.set(style="whitegrid")
ax = sns.boxplot(x="age", y="Foxf2", data=subs, showfliers = False, palette="Blues")
ax = sns.stripplot(x="age", y="Foxf2", data=subs, color=".25",size=1)
plt.show()

In [None]:
sns.set(style="whitegrid")
ax = sns.boxplot(x="age", y="Foxo1", data=subs, showfliers = False, palette="Blues")
ax = sns.stripplot(x="age", y="Foxo1", data=subs, color=".25",size=1)
plt.show()

Exclude dropouts (cells with 0 expression of any of the 2 genes)

In [None]:
coex = (adata_ecs[:,'{}'.format("Foxf2")].X > 0) & (adata_ecs[:,'{}'.format("Foxo1")].X > 0)

In [None]:
adata_ecs_above0 = adata_ecs[coex]
adata_ecs_above0

In [None]:
pd.value_counts(adata_ecs_above0.obs["age"])

In [None]:
sc.pl.dotplot(adata_ecs_above0, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized", title="ECs")

In [None]:
adat = adata_ecs_above0
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

In [None]:
sns.set(style="whitegrid")
ax = sns.boxplot(x="age", y="Foxf2", data=subs, showfliers = False, palette="Blues")
ax = sns.stripplot(x="age", y="Foxf2", data=subs, color=".25",size=1)
plt.show()

In [None]:
sns.set(style="whitegrid")
ax = sns.boxplot(x="age", y="Foxo1", data=subs, showfliers = False, palette="Blues")
ax = sns.stripplot(x="age", y="Foxo1", data=subs, color=".25",size=1)
plt.show()

### All cell types

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adata

In [None]:
adat = adata
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

Exclude dropouts (cells with 0 expression of both genes)

In [None]:
coex = (adat[:,'{}'.format("Foxf2")].X > 0) | (adat[:,'{}'.format("Foxo1")].X > 0)

In [None]:
adata_above0 = adat[coex]
adata_above0

In [None]:
pd.value_counts(adata_above0.obs["age"])

In [None]:
sc.pl.dotplot(adata_above0, gene, groupby='age', swap_axes=True, figsize=(4,2), standard_scale="var", layer="normalized")

In [None]:
adat = adata_above0
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

## Impute expression values of dropouts  
Using MAGIC

In [None]:
import magic

In [None]:
fdata = pd.DataFrame(adata.X, columns = adata.var_names)
fdata.head()

In [None]:
magic_op = magic.MAGIC()

In [None]:
f_magic = magic_op.fit_transform(fdata)

In [None]:
import scprep

In [None]:
gene1="Foxf2"
gene2="Foxo1"
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(10, 5))
scprep.plot.scatter(x=fdata[gene1], y=fdata[gene2], ax=ax1, xlabel=gene1, ylabel=gene2, legend_title=gene3, title='Before MAGIC')
scprep.plot.scatter(x=f_magic[gene1], y=f_magic[gene2], ax=ax2, xlabel=gene1, ylabel=gene2, legend_title=gene3, title='After MAGIC')
plt.tight_layout()
plt.show()

In [None]:
adata.X.shape

In [None]:
f_magic.shape

In [None]:
adata.layers["imputed"] = f_magic

In [None]:
# ECs only
adata_ecs = adata[adata.obs['clusters'].isin(['Endothelial cells'])]
adata_ecs

### ECs

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adata

In [None]:
adata_ecs

In [None]:
adat = adata_ecs
subs = adat[:,gg].layers["imputed"]
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6,"s":1})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
import scipy as sp
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

In [None]:
sns.set(style="whitegrid")
ax = sns.boxplot(x="age", y="Foxf2", data=subs, showfliers = False, palette="Blues")
ax = sns.stripplot(x="age", y="Foxf2", data=subs, color=".25",size=0.5)
plt.show()

In [None]:
sns.set(style="whitegrid")
ax = sns.boxplot(x="age", y="Foxo1", data=subs, showfliers = False, palette="Blues")
ax = sns.stripplot(x="age", y="Foxo1", data=subs, color=".25",size=0.5)
plt.show()

### All cell types

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adata

In [None]:
adat = adata
subs = adat[:,gg].layers["imputed"]
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6,"s":1})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

## Pseudobulk analysis

Because correlation on single cell level is biased by large number of dropouts

In [None]:
import decoupler as dc

In [None]:
adata

In [None]:
adata.obs["sample"] = adata.obs["orig.ident"]

In [None]:
pd.value_counts(adata.obs["sample"])

In [None]:
pd.crosstab(adata.obs["sample"], adata.obs["age"]).T

In [None]:
padata = dc.get_pseudobulk(adata, sample_col='sample', groups_col=None, layer='normalized', min_prop=0.01, min_smpls=0)

In [None]:
padata

### Correlate gene expression (Foxf2 and Foxo1)

#### All cell types

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
padata

In [None]:
adat = padata
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
import scipy as sp
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

#### ECs

In [None]:
padata = dc.get_pseudobulk(adata, sample_col='sample', groups_col='clusters', layer='normalized', min_prop=0.1, min_smpls=3)

In [None]:
padata

In [None]:
# ECs only
padata_ecs = padata[padata.obs['clusters'].isin(['Endothelial cells'])]
padata_ecs

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
adat = padata_ecs
subs = adat[:,gg].X
subs = pd.DataFrame(subs, columns = gg, index=adat.obs.index)
age = pd.DataFrame(adat.obs["age"])
subs = subs.join(age)
subs.head()

In [None]:
g = sns.lmplot(x="Foxf2", y="Foxo1", data=subs, hue="age", fit_reg=False, scatter_kws={'alpha':0.6})
sns.regplot(x="Foxf2", y="Foxo1", data=subs, scatter=False, ax=g.axes[0, 0], color="grey")
g.figure.set_size_inches(7,6)
plt.show()

In [None]:
import scipy as sp
r, p = sp.stats.pearsonr(subs['Foxf2'], subs['Foxo1'])
'Pearson R = {:.2f}, p = {:.2g}'.format(r, p)

In [None]:
graph = sns.jointplot(data=subs, x="Foxf2", y="Foxo1", hue="age")
plt.show()

# Session Info

In [None]:
sc.logging.print_versions()

---
# Export HTML

In [None]:
# use nbconvert conda env

In [None]:
! cp sc_pp_Zhao2022.ipynb HTMLs/$(date '+%Y%m%d')_sc_pp_Zhao2022.ipynb # rename in temp copy
! jupyter nbconvert HTMLs/$(date '+%Y%m%d')_sc_pp_Zhao2022.ipynb --to html_toc # export
! rm HTMLs/$(date '+%Y%m%d')_sc_pp_Zhao2022.ipynb # delete temp copy