<font size="+3.8">Scanpy single-cell pre-processing</font>  
<font size="+1.5"></font>  

Aim: Preprocess mouse brain single-cell data from Vanlandewijck 2018 Nature 

Dataset: https://betsholtzlab.org/VascularSingleCells/database.html

In [None]:
from datetime import date
date.today().strftime('%d/%m/%Y')

In [None]:
import os
os.getlogin()

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import anndata
import matplotlib.pyplot as plt
import platform
from datetime import date
from matplotlib.pyplot import rc_context

In [None]:
os.environ['CONDA_DEFAULT_ENV'] # conda env

In [None]:
platform.platform()

In [None]:
sc.settings.verbosity = 3

In [None]:
main_dir='/run/user/1000/gvfs/smb-share:server=138.245.4.35,share=bd-dichgans/SF' # Linux

In [None]:
main_dir='\\\isdsynnas.srv.med.uni-muenchen.de\BD-Dichgans\SF' # Win

# Load data

## Annotated 
Downloaded from: https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE98816 (normalized count matrix and Series Matrix File = metadata2) and https://figshare.com/collections/_/4077260 02/02/2023

### Vascular Cells

In [None]:
ad = sc.read_text(main_dir+"\\P06_vasc_scRNAseq\Vanlandewijck2018\\GSE98816_Brain_samples_normalized_counts_matrix.txt.gz").transpose() # vascular cells

In [None]:
#ad = sc.read_text(main_dir+"\\P06_vasc_scRNAseq\Vanlandewijck2018\\GSE98816_Brain_samples_raw_read_counts_matrix.txt.gz").transpose()

In [None]:
ad

In [None]:
# Add var

In [None]:
ad.var.head()

In [None]:
# not available

In [None]:
# Add metadata

In [None]:
meta = pd.read_csv(main_dir+"\\P06_vasc_scRNAseq\\Vanlandewijck2018\\Descriptionofeachsinglecellsamples.csv", sep=";")

In [None]:
meta.head()

In [None]:
meta["tissue origin"].value_counts()

In [None]:
meta = meta[meta["tissue origin"] == "Brain"]

In [None]:
pd.crosstab(meta["Mouse ID"], meta["genotype"])

In [None]:
meta["genotype"].value_counts()

In [None]:
meta["annoated cell types"].value_counts()

In [None]:
meta2 = pd.read_csv(main_dir+"\\P06_vasc_scRNAseq\\Vanlandewijck2018\\GSE98816_series_matrix.txt", sep="\t", header=None).T

In [None]:
new_header = meta2.iloc[0]
new_header = [s.lstrip('!') for s in new_header] # remove leading !
meta2 = meta2[1:]
meta2.columns = new_header
meta2.Sample_description = [s.lstrip('processed data column: ') for s in meta2.Sample_description] # remove leading text

In [None]:
meta2=meta2.reset_index()
meta2=meta2.iloc[:,1:]

In [None]:
meta2.head(3)

In [None]:
meta2.columns

In [None]:
meta2.Sample_description

In [None]:
all(meta2.Sample_geo_accession == meta2.ID_REF)

In [None]:
meta2["tissue origin"] = meta2['Sample_description'].str.split('.').str[0]

In [None]:
meta2["tissue origin"].value_counts()

Note: Matches count of vascular cells from metadata 1 and anndata object

In [None]:
len(meta2.Sample_geo_accession.unique())

All Sample_geo_accession are unique

In [None]:
all(meta2.Sample_geo_accession.isin(meta["GSM ID"]))

All Sample_geo_accession are in GSM ID from metadata table 1. Now we can match meta2.Sample_description (anndata descriptors) to meta["annoated cell types"]

In [None]:
# use only selected cols from meta2
meta2 = meta2[['Sample_title', 'Sample_geo_accession', 'tissue origin', 'Sample_description']]
meta2.head()

In [None]:
# use only selected cols from meta2
meta = meta[['GSM ID','GSM name','organism','Mouse ID','strain','genotype','age','tissue origin','annoated cell types','BioSample ID','SRA ID','Sequenced Reads']]
meta.head()

In [None]:
meta.shape # contains vascular cells + 250 astrocytes

In [None]:
meta2.shape # contains vascular cells only

In [None]:
meta_merged = meta.merge(meta2, how='inner', left_on="GSM ID", right_on="Sample_geo_accession")
meta_merged.head()

In [None]:
# format ad.obs

In [None]:
ad.obs.head()

This is the format of metadata in the anndata object. Corresponds to meta_merged.Sample_description

In [None]:
ad.shape

In [None]:
temp = ad.obs.reset_index(level=0)["index"].str.split(pat=".",expand=True)
temp

In [None]:
temp["name"] = temp[0] + "." + temp[1]

In [None]:
temp[0].value_counts()

In [None]:
temp["tissue"] = temp[0]
temp["cell_id"] = temp[1]

In [None]:
temp

In [None]:
temp["cell_type_fine"] = temp.cell_id.str.split(pat="_",expand=True)[0]

In [None]:
temp.cell_type_fine.value_counts()

In [None]:
#temp[temp["cell_type_fine"] == "EC3"].cell_id.unique()

In [None]:
pd.crosstab(temp["cell_type_fine"], temp["cell_id"])

In [None]:
temp['cell_type_broad'] = temp['cell_type_fine'].str.replace('\d+', '')
temp.cell_type_broad.value_counts()

Note: Broad cell types match between metadata and anndata objects

In [None]:
assert all(ad.obs.reset_index()["index"] == temp.name)
assert len(ad.obs) == len(temp)

In [None]:
ad.obs = temp[["tissue","cell_id","cell_type_fine","cell_type_broad","name"]]

In [None]:
# add metadata from meta_merged to anndata

In [None]:
ad.obs

In [None]:
ad.obs = ad.obs.merge(meta_merged, how='inner', left_on="name", right_on="Sample_description")

In [None]:
ad.obs.head(3)

In [None]:
ad.obs.columns

In [None]:
pd.crosstab(ad.obs["cell_type_fine"], ad.obs["annoated cell types"])

In [None]:
ad.obs["annoated cell types"].value_counts()

Check if data is normalized:

In [None]:
ad.layers

In [None]:
# show expression of 100 random genes (across all spots)
import random
import seaborn as sns
random_genes=random.sample(range(0, ad.X.shape[1]), 100)
adata_sub = ad[:,random_genes]
exp=pd.DataFrame(np.matrix(adata_sub.X))
# plot
pl1=sns.displot(data=pd.melt(exp),x='value',height=4,hue='variable',kind="kde",warn_singular=False,legend=False,palette=list(np.repeat('#086da6',100)), lw=0.3) # genes with 0 expression are excluded
pl1.set(xlim=(-0.5, 7),ylim=(0,0.007));
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(4,4)})
pl=sns.histplot(data=pd.melt(exp),x='value',binwidth=0.5,legend=True,palette=list(np.repeat('#086da6',100)))
pl.set(xlim=(0, 10));
plt.show()

In [None]:
sns.histplot(ad.X.sum(1), bins=100, kde=False)

In [None]:
exp.head()

Note: Data is normalized by definition (see file name)

Corresponds to https://betsholtzlab.org/VascularSingleCells/database.html, except for astrocytes

### Astrocytes

In [None]:
ad_astro = sc.read_text(main_dir+"\\P06_vasc_scRNAseq\Vanlandewijck2018\\GSE99058_Brain_AC_250samples_normalized_counts_matrix.txt.gz").transpose() # astrocytes

In [None]:
ad_astro

In [None]:
# format ad_astro.obs

In [None]:
ad_astro.obs.head()

This is the format of metadata in the anndata object. Corresponds to meta_merged.Sample_description

In [None]:
ad_astro.shape

In [None]:
temp = ad_astro.obs.reset_index(level=0)["index"].str.split(pat=".",expand=True)
temp

In [None]:
temp["name"] = temp[0] + "." + temp[1]

In [None]:
temp[0].value_counts()

In [None]:
temp["tissue"] = temp[0]
temp["cell_id"] = temp[1]

In [None]:
temp

In [None]:
temp["cell_type_fine"] = temp.cell_id.str.split(pat="_",expand=True)[0]

In [None]:
temp.cell_type_fine.value_counts()

wrong labels?

In [None]:
#temp[temp["cell_type_fine"] == "EC3"].cell_id.unique()

In [None]:
pd.crosstab(temp["cell_type_fine"], temp["cell_id"])

In [None]:
temp['cell_type_broad'] = temp['cell_type_fine'].str.replace('\d+', '')
temp.cell_type_broad.value_counts()

Note: Broad cell types match between metadata and anndata objects

In [None]:
assert all(ad_astro.obs.reset_index()["index"] == temp.name)
assert len(ad_astro.obs) == len(temp)

In [None]:
ad_astro.obs = temp[["tissue","cell_id","cell_type_fine","cell_type_broad","name"]]

In [None]:
ad_astro.obs["annoated cell types"] = "AC"

Check if data is normalized:

In [None]:
ad_astro.layers

In [None]:
# show expression of 100 random genes (across all spots)
import random
import seaborn as sns
random_genes=random.sample(range(0, ad.X.shape[1]), 100)
adata_sub = ad_astro[:,random_genes]
exp=pd.DataFrame(np.matrix(adata_sub.X))
# plot
pl1=sns.displot(data=pd.melt(exp),x='value',height=4,hue='variable',kind="kde",warn_singular=False,legend=False,palette=list(np.repeat('#086da6',100)), lw=0.3) # genes with 0 expression are excluded
pl1.set(xlim=(-0.5, 7),ylim=(0,0.007));
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(4,4)})
pl=sns.histplot(data=pd.melt(exp),x='value',binwidth=0.5,legend=True,palette=list(np.repeat('#086da6',100)))
pl.set(xlim=(0, 10));
plt.show()

In [None]:
sns.histplot(ad_astro.X.sum(1), bins=100, kde=False)

In [None]:
exp.head()

Note: Data is normalized by definition (see file name)

### Merge

Merge anndata objects from vasculature and astrocytes

But first, drop 18 ACs from vasculature dataset. In https://betsholtzlab.org/VascularSingleCells/database.html exactly 250 ACs are shown, presumably from the 250 AC dataset.

In [None]:
ad = ad[~ad.obs['annoated cell types'].isin(['AC'])]

In [None]:
ad

In [None]:
ad_astro

In [None]:
ad_merged = anndata.concat([ad, ad_astro], join="inner")

In [None]:
ad_merged

In [None]:
ad_merged.obs.head(3)

In [None]:
ad_merged.obsm['X_umap'] # no umap available as expected

In [None]:
ad_merged.obs["clusters_fine"] = ad_merged.obs["annoated cell types"]

In [None]:
ad_merged.obs["clusters"] = ad_merged.obs["annoated cell types"]

In [None]:
# merge subclusters
new_names = {
    'ECs': {'EC1','EC2','EC3','aEC','vEC','capilEC'},
    'Pericytes': {'PC'},
    'SMCs': {'aSMC','aaSMC','vSMC'},
    'Oligos': {'OL'},
    'Fibroblasts': {'FB1', 'FB2'},
    'Microglia': {'MG'},
    'Astrocytes': {'AC'}
}
# re-format
result = {}
for key, value in new_names.items():
    for elem in value:
        result[(elem)] = key
new_cluster_names = dict(sorted(result.items()))
print(new_cluster_names)

In [None]:
ad_merged.obs['clusters'] = (
    ad_merged.obs['clusters']
    .map(new_cluster_names)
    .astype('category')
)

In [None]:
# alternative: keep AV zonation

In [None]:
# merge subclusters
new_names = {
    'ECs_non_AV': {'EC1','EC2','EC3'},
    'ECs_Arterial': {'aEC'},
    'ECs_Capillary': {'capilEC'},
    'ECs_Venous': {'vEC'},
    'Pericytes': {'PC'},
    'SMCs': {'aSMC','aaSMC','vSMC'},
    'Oligos': {'OL'},
    'Fibroblasts': {'FB1', 'FB2'},
    'Microglia': {'MG'},
    'Astrocytes': {'AC'}
}
# re-format
result = {}
for key, value in new_names.items():
    for elem in value:
        result[(elem)] = key
new_cluster_names = dict(sorted(result.items()))
print(new_cluster_names)

In [None]:
# fine annotation
ad_merged.obs['clusters2'] = (
    ad_merged.obs['clusters_fine']
    .map(new_cluster_names)
    .astype('category')
)

In [None]:
ad_merged.obs.clusters.value_counts()

In [None]:
ad_merged.obs.clusters2.value_counts()

In [None]:
# exclude clusters with <50 cells
cluster_counts = ad_merged.obs['clusters'].value_counts()
cluster_counts

In [None]:
ad_merged = ad_merged[ad_merged.obs['clusters'].isin(cluster_counts[cluster_counts>49].index)]

Check if data is normalized:

In [None]:
ad_merged.layers

In [None]:
# show expression of 100 random genes (across all spots)
import random
import seaborn as sns
random_genes=random.sample(range(0, ad.X.shape[1]), 100)
adata_sub = ad_merged[:,random_genes]
exp=pd.DataFrame(np.matrix(adata_sub.X))
# plot
pl1=sns.displot(data=pd.melt(exp),x='value',height=4,hue='variable',kind="kde",warn_singular=False,legend=False,palette=list(np.repeat('#086da6',100)), lw=0.3) # genes with 0 expression are excluded
pl1.set(xlim=(-0.5, 7),ylim=(0,0.007));
plt.show()

In [None]:
sns.set(rc={'figure.figsize':(4,4)})
pl=sns.histplot(data=pd.melt(exp),x='value',binwidth=0.5,legend=True,palette=list(np.repeat('#086da6',100)))
pl.set(xlim=(0, 10));
plt.show()

In [None]:
sns.histplot(ad_merged.X.sum(1), bins=100, kde=False)

In [None]:
exp.head()

# Log-normalize merged dataest

In [None]:
ad_merged.layers["counts"] = ad_merged.X.copy() # save unnormalized raw RNA counts - retrieve via adata.X = adata.layers["counts"]

In [None]:
sc.pp.normalize_total(ad_merged, inplace=True) # Normalize each spot by total counts over all genes, so that every spot has the same total count after normalization.

In [None]:
sns.histplot(ad_merged.X.sum(1), kde=False)

In [None]:
sc.pp.log1p(ad_merged) # X = log(X + 1)

In [None]:
sns.histplot(ad_merged.X.sum(1), kde=False)

In [None]:
ad_merged.layers["normalized"] = ad_merged.X.copy() # save normalized + log-transformed (but unscaled) counts - retrieve via adata.X = adata.layers["normalized"]

In [None]:
# Identify highly-variable genes
sc.pp.highly_variable_genes(ad_merged)
sc.pl.highly_variable_genes(ad_merged)

In [None]:
ad_merged.layers

In [None]:
ad_merged.layers["normalized"]

# Focus on: Foxf2

In [None]:
gene="Foxf2"

## Plot

In [None]:
sns.set_theme(style='white')
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

## Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Foxf2_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+name+'.csv',sep=';')

## Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Foxf2_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+name+'_incl_zonation.csv',sep=';')

# Focus on: Other genes

## Foxo1

In [None]:
gene="Foxo1"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Tek = Tie2

In [None]:
gene="Tek"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Nos3 = eNOS

In [None]:
gene="Nos3"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Htra1

In [None]:
gene="Htra1"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Egfl8

In [None]:
gene="Egfl8"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
foxf2 = pd.concat([ex, fc], axis=1)
foxf2['gene'] = gene
foxf2['source'] = "Vanlandewijck2018"
foxf2['organism'] = "Mouse"
foxf2['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(foxf2.index)
foxf2

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
foxf2.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Flt1

In [None]:
gene="Flt1"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Kdr

In [None]:
gene="Kdr"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Nrp1

In [None]:
gene="Nrp1"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Nrp2

In [None]:
gene="Nrp2"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Efnb2

In [None]:
gene="Efnb2"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Itgb1

In [None]:
gene="Itgb1"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Angpt2

In [None]:
gene="Angpt2"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

## Cdh5

In [None]:
gene="Cdh5"

### Plot

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var")

In [None]:
sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var")

### Excl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']
ex

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']
fc

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'.csv',sep=';')

### Incl zonation

In [None]:
# mean expression per group (normalized and scaled)
ex = sc.pl.matrixplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,5), standard_scale="var", return_fig=True).values_df
ex.columns = ['mean_expression']

In [None]:
# dotplot dot size = fraction of cells in group expressing Foxf2
fc = sc.pl.dotplot(ad_merged, [gene], groupby='clusters2', swap_axes=False, figsize=(2,7), standard_scale="var",return_fig=True).dot_size_df
fc.columns = ['fraction_of_cells']

In [None]:
othergene = pd.concat([ex, fc], axis=1)
othergene['gene'] = gene
othergene['source'] = "Vanlandewijck2018"
othergene['organism'] = "Mouse"
othergene['cell_number'] = pd.DataFrame(ad_merged.obs["clusters2"].value_counts()).reindex(othergene.index)
othergene

In [None]:
# export
name='Vanlandewijck2018'

In [None]:
# Win
othergene.to_csv(main_dir+'\\P06_Foxf2_per_celltype\\Other_genes_summarized\\incl_zonation\\'+ date.today().strftime("%Y%m%d")+'_'+gene+'_'+name+'_incl_zonation.csv',sep=';')

# Correlate gene expression (Foxf2 and Foxo1)

Using MAGIC denoising

In [None]:
import magic
import scprep

In [None]:
#sc.pp.scale(ad_merged)

In [None]:
sns.set_theme(style='white')

## ECs

In [None]:
gg = ["Foxf2","Foxo1","Nos3"]

In [None]:
adata_EC = ad_merged[ad_merged.obs.clusters == "ECs"]

In [None]:
adata_EC

In [None]:
adata_EC.layers

In [None]:
#matrix = pd.DataFrame(adata_EC.X) # not compatible with sparse 
matrix = adata_EC.X
matrix.columns = adata_EC.var.index.tolist()

In [None]:
cutoff_var = None

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=cutoff_var)

In [None]:
# filter lowly expressed genes and cells with a small library size
#matrix = scprep.filter.filter_library_size(matrix, cutoff=cutoff_var)
#matrix.head()

Note: Skipped normalization as data is already log-normalized

In [None]:
adata_EC.layers

### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(adata_EC, genes=['Foxf2', 'Foxo1', 'Nos3'])

### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

In [None]:
np.corrcoef(emt_magic[:,['Foxf2','Foxo1']].X, rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))
scprep.plot.scatter(x=adata_EC[:,'Foxf2'].X, y=adata_EC[:,'Foxo1'].X, c=adata_EC[:,'Nos3'].X, ax = ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')
scprep.plot.scatter(x=emt_magic[:,'Foxf2'].X, y=emt_magic[:,'Foxo1'].X, c=emt_magic[:,'Nos3'].X, ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((1,1), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

## PCs

In [None]:
gg = ["Foxf2","Foxo1","Nos3"]

In [None]:
adata_EC = ad_merged[ad_merged.obs.clusters == "Pericytes"]

In [None]:
adata_EC

In [None]:
adata_EC.layers

In [None]:
#matrix = pd.DataFrame(adata_EC.X) # not compatible with sparse 
matrix = adata_EC.X
matrix.columns = adata_EC.var.index.tolist()

In [None]:
cutoff_var = None

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=cutoff_var)

In [None]:
# filter lowly expressed genes and cells with a small library size
#matrix = scprep.filter.filter_library_size(matrix, cutoff=cutoff_var)
#matrix.head()

Note: Skipped normalization as data is already log-normalized

In [None]:
adata_EC.layers

### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(adata_EC, genes=['Foxf2', 'Foxo1', 'Nos3'])

### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

In [None]:
np.corrcoef(emt_magic[:,['Foxf2','Foxo1']].X, rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))
scprep.plot.scatter(x=adata_EC[:,'Foxf2'].X, y=adata_EC[:,'Foxo1'].X, c=adata_EC[:,'Nos3'].X, ax = ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')
scprep.plot.scatter(x=emt_magic[:,'Foxf2'].X, y=emt_magic[:,'Foxo1'].X, c=emt_magic[:,'Nos3'].X, ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((0,0), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

## All cell types

In [None]:
gg = ["Foxf2","Foxo1"]

In [None]:
ad_merged

In [None]:
#matrix = pd.DataFrame(adata.X) # not compatible with sparse 
matrix = ad_merged.X
#matrix.columns = ad_merged.var.index.tolist()

In [None]:
cutoff_var = 700

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=cutoff_var)

In [None]:
# filter lowly expressed genes and cells with a small library size
#matrix = scprep.filter.filter_library_size(matrix, cutoff=cutoff_var)
#matrix.head()

Note: Skipped normalization as data is already log-normalized

In [None]:
ad_merged.layers

### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(ad_merged, genes=['Foxf2', 'Foxo1', 'Nos3'])

### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

In [None]:
np.corrcoef(emt_magic[:,['Foxf2','Foxo1']].X, rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(13, 6))

scprep.plot.scatter(x=ad_merged[:,'Foxf2'].X, y=ad_merged[:,'Foxo1'].X, c=ad_merged[:,'Nos3'].X, ax = ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')

scprep.plot.scatter(x=emt_magic[:,'Foxf2'].X, y=emt_magic[:,'Foxo1'].X, c=emt_magic[:,'Nos3'].X, ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', title='After MAGIC')
plt.axline((1,1), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

## capECs

In [None]:
gg = ["Foxf2","Foxo1","Nos3"]

In [None]:
adata_EC = ad_merged[ad_merged.obs.clusters2 == "ECs_Capillary"]

In [None]:
adata_EC

In [None]:
adata_EC.layers

In [None]:
#matrix = pd.DataFrame(adata_EC.X) # not compatible with sparse 
matrix = adata_EC.X
matrix.columns = adata_EC.var.index.tolist()

In [None]:
cutoff_var = None

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=cutoff_var)

In [None]:
# filter lowly expressed genes and cells with a small library size
#matrix = scprep.filter.filter_library_size(matrix, cutoff=cutoff_var)
#matrix.head()

Note: Skipped normalization as data is already log-normalized

In [None]:
adata_EC.layers

### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(adata_EC, genes=['Foxf2', 'Foxo1', 'Nos3'])

### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

In [None]:
np.corrcoef(emt_magic[:,['Foxf2','Foxo1']].X, rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))
scprep.plot.scatter(x=adata_EC[:,'Foxf2'].X, y=adata_EC[:,'Foxo1'].X, c=adata_EC[:,'Nos3'].X, ax = ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')
scprep.plot.scatter(x=emt_magic[:,'Foxf2'].X, y=emt_magic[:,'Foxo1'].X, c=emt_magic[:,'Nos3'].X, ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((1,1), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

## aECs

In [None]:
gg = ["Foxf2","Foxo1","Nos3"]

In [None]:
adata_EC = ad_merged[ad_merged.obs.clusters2 == "ECs_Arterial"]

In [None]:
adata_EC

In [None]:
adata_EC.layers

In [None]:
#matrix = pd.DataFrame(adata_EC.X) # not compatible with sparse 
matrix = adata_EC.X
matrix.columns = adata_EC.var.index.tolist()

In [None]:
cutoff_var = None

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=cutoff_var)

In [None]:
# filter lowly expressed genes and cells with a small library size
#matrix = scprep.filter.filter_library_size(matrix, cutoff=cutoff_var)
#matrix.head()

Note: Skipped normalization as data is already log-normalized

In [None]:
adata_EC.layers

### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(adata_EC, genes=['Foxf2', 'Foxo1', 'Nos3'])

### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

In [None]:
np.corrcoef(emt_magic[:,['Foxf2','Foxo1']].X, rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))
scprep.plot.scatter(x=adata_EC[:,'Foxf2'].X, y=adata_EC[:,'Foxo1'].X, c=adata_EC[:,'Nos3'].X, ax = ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')
scprep.plot.scatter(x=emt_magic[:,'Foxf2'].X, y=emt_magic[:,'Foxo1'].X, c=emt_magic[:,'Nos3'].X, ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((1,1), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

## vECs

In [None]:
gg = ["Foxf2","Foxo1","Nos3"]

In [None]:
adata_EC = ad_merged[ad_merged.obs.clusters2 == "ECs_Venous"]

In [None]:
adata_EC

In [None]:
adata_EC.layers

In [None]:
#matrix = pd.DataFrame(adata_EC.X) # not compatible with sparse 
matrix = adata_EC.X
matrix.columns = adata_EC.var.index.tolist()

In [None]:
cutoff_var = None

In [None]:
scprep.plot.plot_library_size(matrix, cutoff=cutoff_var)

In [None]:
# filter lowly expressed genes and cells with a small library size
#matrix = scprep.filter.filter_library_size(matrix, cutoff=cutoff_var)
#matrix.head()

Note: Skipped normalization as data is already log-normalized

In [None]:
adata_EC.layers

### Creating the MAGIC operator
If you don't specify parameters, MAGIC creates an operator with the following default values: knn=5, knn_max = 3 * knn, decay=1, t=3.

In [None]:
magic_op = magic.MAGIC()

### Running MAGIC with gene selection
The magic_op.fit_transform function takes the normalized data and an array of selected genes as its arguments. If no genes are provided, MAGIC will return a matrix of all genes. The same can be achieved by substituting the array of gene names with genes='all_genes'.

In [None]:
%%time
emt_magic = magic_op.fit_transform(adata_EC, genes=['Foxf2', 'Foxo1', 'Nos3'])

### Visualizing gene-gene relationships

We can see gene-gene relationships much more clearly after applying MAGIC. Note that the change in absolute values of gene expression is not meaningful - the relative difference is all that matters.

In [None]:
np.corrcoef(emt_magic[:,['Foxf2','Foxo1']].X, rowvar = False)[0][1]

In [None]:
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(16, 6))
scprep.plot.scatter(x=adata_EC[:,'Foxf2'].X, y=adata_EC[:,'Foxo1'].X, c=adata_EC[:,'Nos3'].X, ax = ax1,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='Before MAGIC')
scprep.plot.scatter(x=emt_magic[:,'Foxf2'].X, y=emt_magic[:,'Foxo1'].X, c=emt_magic[:,'Nos3'].X, ax=ax2,
                    xlabel='Foxf2', ylabel='Foxo1', legend_title="Nos3", title='After MAGIC')
plt.axline((1,1), slope=1, color="black", alpha=0.3, linestyle="--")
plt.tight_layout()
plt.show()

# Session Info

In [None]:
sc.logging.print_versions()