<font size="+3.8">Scanpy single-cell pre-processing</font>  
<font size="+1.5"></font>  

Aim: Preprocess annotated human brain single-cell data from Siletti 2022 bioRxiv

In [None]:
import datetime
datetime.date.today().strftime('%d/%m/%Y')

In [None]:
import os
os.getlogin()

In [None]:
import sys
import fnmatch 

In [None]:
import anndata
import scanpy as sc
import scipy as sci
#sc.logging.print_versions()

In [None]:
sc.settings.verbosity = 3

In [None]:
#import loompy

In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
from matplotlib import colors
import seaborn as sb
now = datetime.datetime.now()
today = now.strftime("%Y%m%d")

In [None]:
from matplotlib.pyplot import rc_context

In [None]:
os.getcwd()

In [None]:
main_dir='/run/user/1000/gvfs/smb-share:server=138.245.4.35,share=bd-dichgans/SF' # Linux
main_dir='\\\isdsynnas.srv.med.uni-muenchen.de\BD-Dichgans\SF' # Win

In [None]:
main_dir='/cluster2/sfrerich/jupyter/p6-foxf2-per-celltype/data' # HPC

# Load + format data

Annotated by authors

### All cells

All superclusters downloaded from cellxgene https://cellxgene.cziscience.com/collections/283d65eb-dd53-496d-adb7-7570c7caa443 
(23/02/2023) except for Miscancelleous

In [None]:
# run once

In [None]:
f=os.listdir(os.path.join(main_dir+'/P6_vasc_scRNAseq/Siletti2022/all_cellxgene'))
f

In [None]:
# load, format each file into smaller object, normalize, export
for p in f:
    adata = anndata.read_h5ad(os.path.join(main_dir+'/P6_vasc_scRNAseq/Siletti2022/all_cellxgene',p))
    adatas = adata[adata.obs["BadCells"] == 0]
    adata = adata[adata.obs["tissue"] != "Spinal cord"]
    adata.obs = adata.obs[["supercluster_term", "cell_type", "assay", "organism", "disease", "tissue", "BadCells"]]
    adata.var = adata.var[["Gene","feature_name"]]
    del adata.uns
    adata.var_names = adata.var["Gene"].tolist()
    adata.obs_names_make_unique()
    adata.var_names_make_unique()
    # log-normalize
    #adata.layers["counts"] = adata.X.copy()
    sc.pp.normalize_total(adata,inplace=True)
    sc.pp.log1p(adata)
    #adata.layers["normalized"] = adata.X.copy()
    adata.write_h5ad(os.path.join(main_dir+'/P6_vasc_scRNAseq/Siletti2022/all_reduced_normalized',p.split(".")[0]+'_reduced.h5ad'))

In [None]:
del adata

In [None]:
# then load from here

In [None]:
f=os.listdir(os.path.join(main_dir+'/P06_vasc_scRNAseq/Siletti2022/all_reduced_normalized'))
f

In [None]:
adatas = {p.split(".")[0]:anndata.read_h5ad(os.path.join(main_dir+'/P06_vasc_scRNAseq/Siletti2022/all_reduced_normalized',p)) for p in f}

In [None]:
adatas.keys()

In [None]:
adatas.values()

In [None]:
# merge - datasets too large
# adata2 = anndata.concat(adatas, axis=0, join="inner", merge="first")
# adata2

In [None]:
from anndata.experimental.multi_files import AnnCollection

In [None]:
adata2 = AnnCollection(adatas, join_vars='inner', label='dataset')

In [None]:
adata2

In [None]:
[i for i in adata2.var_names if "NOS3" in i]

In [None]:
pd.value_counts(adata2.obs["cell_type"])

In [None]:
new_cluster_names = {'Bergmann glial cell':'Astrocytes',
                     'astrocyte':'Astrocytes',
                     'central nervous system macrophage':'Macrophages',
                     'choroid plexus epithelial cell':'Choroid plexus epithelial cells',
                     'endothelial cell':'Endothelial cells',
                     'ependymal cell':'Ependymal cells',
                     'fibroblast':'Fibroblasts',
                     'neuron':'Neurons',
                     'oligodendrocyte':'Oligos',
                     'oligodendrocyte precursor cell':'OPCs',
                     'pericyte':'Pericytes',
                     'vascular associated smooth muscle cell':'SMCs'}
#new_cluster_names=list(new_cluster_names.values())

In [None]:
adata2.obs['clusters'] = (
    adata2.obs['cell_type']
    .map(new_cluster_names)
    .astype('category')
)

In [None]:
pd.value_counts(adata2.obs["clusters"])

In [None]:
#adata3 = adata2.to_adata() # does not contain .X - must subset first into AnnCollectionView object

In [None]:
# subset genes
adata3 = adata2[:,adata2.var_names.isin(["FOXF2","FOXO1","TEK","NOS3","HTRA1","EGFL8","FLT1","KDR","NRP1","NRP2","EFNB2","ITGB1","ANGPT2ery","CDH5"])]

In [None]:
adata3

In [None]:
adata3 = adata3.to_adata()

In [None]:
adata3

In [None]:
adata3.obsm["X_umap"] = adata3.obsm["X_UMAP"]

In [None]:
adata3.var_names

In [None]:
#del adata2

Check if data is normalized:

In [None]:
adata3.X.todense()

In [None]:
# show expression of genes (across all spots)
import random
import seaborn as sns
random_genes=random.sample(range(0, adata3.X.shape[1]), 4)
adata_sub = adata3[:,random_genes]
exp=pd.DataFrame(adata_sub.X.todense())
# plot
pl1=sns.displot(data=pd.melt(exp),x='value',height=4,hue='variable',kind="kde",warn_singular=False,legend=False,palette=list(np.repeat('#086da6',4)), lw=0.3) # genes with 0 expression are excluded
pl1.set(xlim=(-0.5, 7),ylim=(0,0.007))
sns.set_theme(style='white')
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(4,4)})
sns.set_theme(style='white')
pl=sns.histplot(data=pd.melt(exp),x='value',binwidth=0.5,legend=True,palette=list(np.repeat('#086da6',4)))
pl.set(xlim=(0, 10));
plt.show()

# DE Analysis / Vascular marker genes

Dataset too large..

In [None]:
adata_full=adata2.to_adata()

In [None]:
# DEA requires HPC
sc.tl.rank_genes_groups(adata_full, 'clusters', method='wilcoxon')

In [None]:
sc.pl.rank_genes_groups(adata, n_genes=20, sharey=False)

---

# Focus on: Foxf2

In [None]:
gene="FOXF2"

In [None]:
sc.pl.umap(adata3, color='clusters')

In [None]:
sc.pl.umap(adata3, color='cell_type')

In [None]:
sc.pl.umap(adata3, color=gene)

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Foxf2_summarized/'+ date.today().strftime("%Y%m%d")+'_'+name+'_normalized.csv',sep=';')

# Focus on: Foxo1

In [None]:
gene="FOXO1"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ date.today().strftime("%Y%m%d")+'_FOXO1_'+name+'_normalized.csv',sep=';')

# Focus on: Tek=Tie2

In [None]:
gene="TEK"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ date.today().strftime("%Y%m%d")+'_TEK_'+name+'_normalized.csv',sep=';')

# Focus on: eNOS=NOS3

In [None]:
gene="NOS3"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ date.today().strftime("%Y%m%d")+'_NOS3_'+name+'_normalized.csv',sep=';')

# Focus on: HTRA1

In [None]:
gene="HTRA1"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Focus on: EGFL8

In [None]:
gene="EGFL8"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Focus on: FLT1

In [None]:
gene="FLT1"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
main_dir

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ datetime.date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Focus on: KDR

In [None]:
gene="KDR"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ datetime.date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Focus on: NRP1

In [None]:
gene="NRP1"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ datetime.date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Focus on: NRP2

In [None]:
gene="NRP2"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ datetime.date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Focus on: EFNB2

In [None]:
gene="EFNB2"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ datetime.date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Focus on: ITGB1

In [None]:
gene="ITGB1"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ datetime.date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Focus on: ANGPT2

In [None]:
gene="ANGPT2"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ datetime.date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Focus on: CDH5

In [None]:
gene="CDH5"

In [None]:
sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(adata3, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Siletti2022"
foxf2['organism'] = "Human"
foxf2['cell_number'] = pd.DataFrame(adata3.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2.index.name = "clusters"
foxf2

In [None]:
# export
name='Siletti2022'

In [None]:
# Win
foxf2.to_csv(main_dir+'/Others_summarized/'+ datetime.date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_normalized.csv',sep=';')

# Session Info

In [None]:
sc.logging.print_versions()