<font size="+3.8">Tspo Expression per Cell Type</font>  
<font size="+1.5"></font>  

Aim: Combine and plot Tspo expression from processed scRNA-seq studies. Processing see separate scripts.

For Herms lab 02/2025

In [None]:
from datetime import date
date.today().strftime('%d/%m/%Y')

In [None]:
import os
os.getlogin()

In [None]:
import numpy as np
import pandas as pd
import scanpy as sc
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
import glob
from datetime import date
from matplotlib.pyplot import rc_context
import seaborn as sns
import platform

In [None]:
import fnmatch

In [None]:
os.environ['CONDA_DEFAULT_ENV'] # conda env

In [None]:
platform.platform()

In [None]:
sc.settings.verbosity = 3

In [None]:
main_dir='/run/user/1000/gvfs/smb-share:server=138.245.4.35,share=bd-dichgans/SF' # Linux
main_dir='\\\isdsynnas.srv.med.uni-muenchen.de\BD-Dichgans\SF' # Win
main_dir='/Volumes/BD-Dichgans/SF'

# Tspo

In [None]:
gene="Tspo"

## Load data

In [None]:
path = os.path.join(main_dir,'P06_Foxf2_per_celltype','202502-Tspo-HermsLab', 'Genes_summarized')
all_files = os.listdir(path)
matching_files = [file for file in all_files if gene.lower() in file.lower() and file.endswith(".csv")]
matching_files

In [None]:
li = []

for filename in matching_files:
    df = pd.read_csv(os.path.join(path,filename), index_col=None, header=0, sep=";")
    li.append(df)

all = pd.concat(li, axis=0, ignore_index=True)

In [None]:
all.source.unique()

In [None]:
all.gene.unique()

## Format

### Remove celltypes

Remove celltypes which are only available in few studies

In [None]:
all.clusters.value_counts()

In [None]:
pd.options.display.max_columns=50
pd.crosstab(all.source, all.clusters)

In [None]:
all = all[all.clusters != "Unknown"]
all = all[all.clusters != 'Immune_Other']
all = all[all.clusters != 'Olfactory ensheathing cells']
all = all[all.clusters != 'Fibromyocytes']
all = all[all.clusters != 'T cells']
all = all[all.clusters != 'NK/T cells']
all = all[all.clusters != 'Choroid plexus epithelial cells']
all = all[all.clusters != 'Neuroepithelial cells']
all = all[all.clusters != 'Olfactory ensheathing cells']

In [None]:
all = all[all.clusters != 'Hemoglobin-expressing vascular cells']
all = all[all.clusters != 'Olfactory ensheathing glia']
all = all[all.clusters != 'Hypendymal cells']

### Rename Oligos

In [None]:
all = all.replace("Oligos","Oligodendrocytes")

### Match Microglia naming

In [None]:
all.clusters.unique().tolist()

In [None]:
all = all.replace("Microglia","Microglia/Mφ")

In [None]:
all = all.replace("Macrophages","Microglia/Mφ")

In [None]:
all = all.replace("Microglia/Macrophages","Microglia/Mφ")

In [None]:
all.clusters.unique().tolist()

### SMCs/Pericytes

In [None]:
all[all.clusters=="SMCs/Pericytes"]

In [None]:
all = all.replace("SMCs/Pericytes","SMCs") # rather SMCs than pericytes, see TabulaMurisSenis2020 pp script

### Fibroblasts = VLMC

See Zeisel 2018 Cell

In [None]:
all = all.replace("Leptomeningeal cells","Fibroblasts")

In [None]:
all = all.replace("VLMCs","Fibroblasts")

### Group Neuroblasts/Neuronal stem cells

In [None]:
# Group Neurogenesis + Neuroblasts + Neuronal stem cells

In [None]:
all = all.replace("Neuroblasts","Neuroblasts/NSCs")
all = all.replace("Neuronal stem cells","Neuroblasts/NSCs")
all = all.replace("Neurogenesis","Neuroblasts/NSCs")
all = all.replace("Neural stem cells","Neuroblasts/NSCs")

In [None]:
all[all.clusters=="Neuroblasts/NSCs"]

### Rename ECs

In [None]:
# rename ECs to Endothelial cells

In [None]:
all2 = all

In [None]:
all2 = all2.replace("ECs","Endothelial cells")

### Set order of axes

In [None]:
all2.clusters.unique().tolist()

In [None]:
#all2.source = pd.Categorical(all2.source, categories= ["Heindl2022", "Vanlandewijck2018", "Saunders2018", "Zeisel2018", "TabulaMuris2018", "Winkler2022", "Yang2022", "Siletti2022", "Garcia2022"])

In [None]:
all2.source = pd.Categorical(all2.source, categories= ["Heindl2022", "Saunders2018", "Yang2022"])

In [None]:
all2.clusters = pd.Categorical(all2.clusters, categories= ["Astrocytes","Microglia/Mφ", "Oligodendrocytes", "OPCs", "Endothelial cells", "Pericytes", "SMCs", "Fibroblasts", "Neurons", "Neuroblasts/NSCs", "Ependymal cells"])

In [None]:
all2 = all2.sort_values("clusters")

### Rename studies

In [None]:
all2 = all2.replace("Saunders2018","Saunders, 2018, Cell")

In [None]:
all2 = all2.replace("Heindl2022","Own data")

In [None]:
all2 = all2.replace("Zeisel2018","Zeisel, 2018, Cell")

In [None]:
all2 = all2.replace("TabulaMuris2018", "Tabula Muris, 2018, Nature")

In [None]:
all2 = all2.replace("Winkler2022", "Winkler, 2022, Science")

In [None]:
all2 = all2.replace("Yang2022", "Yang, 2022, Nature")

In [None]:
all2 = all2.replace("Vanlandewijck2018", "Vanlandewijck, 2018, Nature")

In [None]:
all2 = all2.replace("Siletti2022", "Siletti, 2022, bioRxiv")

In [None]:
all2 = all2.replace("Garcia2022", "Garcia, 2022, Nature")

In [None]:
#all2 = all2.replace("Ximerakis2019", "Ximerakis, 2019, Nat Neurosci")
#all2 = all2.replace("TabulaMurisSenis2020", "Tabula Muris, 2020, Nature")
#all2 = all2.replace("Zhao2020", "Zhao, 2020, Nat Comm")

## Heatmap

In [None]:
all2

In [None]:
# Format table
all4=all2.set_index(['clusters','source'])['mean_expression'].unstack().reset_index() # unmelt
# change row order to represent groupings
all4.index = all4.clusters
all4=all4.reindex(list(all2.clusters.unique()))
all4=all4.drop("clusters",axis=1)
#all4.sort_values("clusters")
#all4

In [None]:
# create color palette
my_cmap2 = mpl.colors.LinearSegmentedColormap.from_list("", ['#ebebeb','tomato'])
plt.cm.register_cmap("mycolormap", my_cmap2)
cpal = sns.color_palette("mycolormap", as_cmap=True)
#cpal

In [None]:
with rc_context({'figure.figsize': (4.5,2.5), 'figure.dpi': 120}):
    pl = sns.heatmap(all4, cmap=cpal, vmin=0, vmax=1, linewidths=0.01).set(ylabel=None,xlabel=None);
    plt.tick_params(axis='both', which='major', labelsize=10, labelbottom = False, bottom=False, top = False, labeltop=True)
    plt.xticks(rotation=45, ha = 'left')
    plt.legend(loc=(1.27,0.345), title=gene+' \nexpression \n(mean)', frameon=False)
    #plt.savefig(main_dir+'\\P06_Foxf2_per_celltype\\plots\\'+date.today().strftime("%Y%m%d")+'_Foxf2_by_celltype_heatmap.png', dpi=500, bbox_inches='tight')
    #plt.savefig(main_dir+'\\P06_Foxf2_per_celltype\\plots\\'+date.today().strftime("%Y%m%d")+'_Foxf2_by_celltype_heatmap.svg', dpi=500, bbox_inches='tight')
    plt.show()

## Dotplot

In [None]:
mycolormap = mpl.colors.LinearSegmentedColormap.from_list("", ['#d1d1d1','tomato'])
mpl.colormaps.register(mycolormap, force = True)
#plt.cm.register_cmap("mycolormap", my_cmap2)
cpal2 = sns.color_palette("mycolormap", as_cmap=True)

In [None]:
all2.columns = ['clusters', 'Mean expression\n       in group', 'Fraction of cells\n   in group (%)', 'gene', 'source','organism', 'cell_number']

In [None]:
all2["Fraction of cells\n   in group (%)"] = all2["Fraction of cells\n   in group (%)"]*100

In [None]:
sns.set(style="white")
pl=sns.relplot(data=all2, x="source", y="clusters",
                hue='Mean expression\n       in group', size='Fraction of cells\n   in group (%)',
                #size_norm=(50, 500),
                palette=cpal2, 
                sizes=(15, 550), # tile size limits
                #marker="s", # round or squared
                linewidth=1,
                #legend=None,
                #aspect=0.9
              );
pl.set(ylabel=None,xlabel=None)
#plt.title("KO vs WT: Enriched gene sets", y=1.02, fontsize=16)
#plt.suptitle("Top "+str(topx_OR)+" DE genes of each brain region, sorted by OR", y=0.97, fontsize=13)
plt.tick_params(axis='both', which='major', labelsize=10, labelbottom = False, bottom=False, top = False, labeltop=True)
plt.xticks(rotation=45, ha = 'left')
pl.set_xticklabels(fontsize=14.4, family="arial", color="black")
#plt.xlabel(family='Arial')
pl.set_yticklabels(fontsize=14.1, family="arial", color="black")
pl.fig.set_size_inches(9.4,4.2)
#plt.legend(loc="upper right")
#sns.despine(bottom = False, left = False, right = False, top = False)
sns.despine(left=True, bottom=True);
#sns.despine(bottom=True, left=True, top=False)

# sns legend
leg = pl._legend
#leg.set_bbox_to_anchor([1.5,0.53])
leg.remove();

# mpl legend
legend = plt.legend(frameon=True, framealpha=0.2, borderpad=0.5, bbox_to_anchor=(1,1), title=gene, # handletextpad=0.7, 
           prop=mpl.font_manager.FontProperties(family='arial', size=10), labelcolor='black')
plt.setp(legend.get_title(), color='black', family='arial', size=13);

In [None]:
plt.show()

In [None]:
pl.savefig(os.path.join(main_dir, 'P06_Foxf2_per_celltype', '202502-Tspo-HermsLab', 'plots', date.today().strftime("%Y%m%d")+'_Tspo_by_celltype_a.png'), dpi=500)

In [None]:
pl.savefig(os.path.join(main_dir, 'P06_Foxf2_per_celltype', '202502-Tspo-HermsLab', 'plots', date.today().strftime("%Y%m%d")+'_Tspo_by_celltype_a.svg'), dpi=500)

## Cell Numbers

In [None]:
# Format table
all5=all2.set_index(['clusters','source'])['cell_number'].unstack().reset_index() # unmelt
# change row order to represent groupings
all5.index = all5.clusters
all5=all5.reindex(list(all2.clusters.unique()))
all5=all5.drop("clusters",axis=1)
#all4.sort_values("clusters")
all5 = all5.fillna(0)
all5 = all5.astype(int)
# add marginal frequencies
all5.loc["Total"] = all5.sum(skipna=True)
all5['Total'] = all5[list(all5.columns)].sum(axis=1)
all5

In [None]:
#all5.to_csv(main_dir+'\\P6_Foxf2_per_celltype\\'+date.today().strftime("%Y%m%d")+'_number_of_cells.csv',sep=";")

In [None]:
del all
del all2
del all4
del all5

# Other genes: mt-Nd1

In [None]:
gene="mt-Nd1"

## Load data

In [None]:
path = os.path.join(main_dir,'P06_Foxf2_per_celltype','202502-Tspo-HermsLab', 'Genes_summarized')
all_files = os.listdir(path)
matching_files = [file for file in all_files if gene.lower() in file.lower() and file.endswith(".csv")]
matching_files

In [None]:
li = []

for filename in matching_files:
    df = pd.read_csv(os.path.join(path,filename), index_col=None, header=0, sep=";")
    li.append(df)

all = pd.concat(li, axis=0, ignore_index=True)

In [None]:
all.source.unique()

In [None]:
all.gene.unique()

## Format

### Remove celltypes

Remove celltypes which are only available in few studies

In [None]:
all.clusters.value_counts()

In [None]:
pd.options.display.max_columns=50
pd.crosstab(all.source, all.clusters)

In [None]:
all = all[all.clusters != "Unknown"]
all = all[all.clusters != 'Immune_Other']
all = all[all.clusters != 'Olfactory ensheathing cells']
all = all[all.clusters != 'Fibromyocytes']
all = all[all.clusters != 'T cells']
all = all[all.clusters != 'NK/T cells']
all = all[all.clusters != 'Choroid plexus epithelial cells']
all = all[all.clusters != 'Neuroepithelial cells']
all = all[all.clusters != 'Olfactory ensheathing cells']

In [None]:
all = all[all.clusters != 'Hemoglobin-expressing vascular cells']
all = all[all.clusters != 'Olfactory ensheathing glia']
all = all[all.clusters != 'Hypendymal cells']

### Rename Oligos

In [None]:
all = all.replace("Oligos","Oligodendrocytes")

### Match Microglia naming

In [None]:
all.clusters.unique().tolist()

In [None]:
all = all.replace("Microglia","Microglia/Mφ")

In [None]:
all = all.replace("Macrophages","Microglia/Mφ")

In [None]:
all = all.replace("Microglia/Macrophages","Microglia/Mφ")

In [None]:
all.clusters.unique().tolist()

### SMCs/Pericytes

In [None]:
all[all.clusters=="SMCs/Pericytes"]

In [None]:
all = all.replace("SMCs/Pericytes","SMCs") # rather SMCs than pericytes, see TabulaMurisSenis2020 pp script

### Fibroblasts = VLMC

See Zeisel 2018 Cell

In [None]:
all = all.replace("Leptomeningeal cells","Fibroblasts")

In [None]:
all = all.replace("VLMCs","Fibroblasts")

### Group Neuroblasts/Neuronal stem cells

In [None]:
# Group Neurogenesis + Neuroblasts + Neuronal stem cells

In [None]:
all = all.replace("Neuroblasts","Neuroblasts/NSCs")
all = all.replace("Neuronal stem cells","Neuroblasts/NSCs")
all = all.replace("Neurogenesis","Neuroblasts/NSCs")
all = all.replace("Neural stem cells","Neuroblasts/NSCs")

In [None]:
all[all.clusters=="Neuroblasts/NSCs"]

### Rename ECs

In [None]:
# rename ECs to Endothelial cells

In [None]:
all2 = all

In [None]:
all2 = all2.replace("ECs","Endothelial cells")

### Set order of axes

In [None]:
#all2.source = pd.Categorical(all2.source, categories= ["Heindl2022", "Vanlandewijck2018", "Saunders2018", "Zeisel2018", "TabulaMuris2018", "Winkler2022", "Yang2022", "Siletti2022", "Garcia2022"])

In [None]:
all2.source = pd.Categorical(all2.source, categories= ["Heindl2022", "Saunders2018", "Yang2022"])

In [None]:
all2.clusters = pd.Categorical(all2.clusters, categories= ["Astrocytes","Microglia/Mφ", "Oligodendrocytes", "OPCs", "Endothelial cells", "Pericytes", "SMCs", "Fibroblasts", "Neurons", "Neuroblasts/NSCs", "Ependymal cells"])

In [None]:
all2 = all2.sort_values("clusters")

### Rename studies

In [None]:
all2 = all2.replace("Saunders2018","Saunders, 2018, Cell")

In [None]:
all2 = all2.replace("Heindl2022","Own data")

In [None]:
all2 = all2.replace("Zeisel2018","Zeisel, 2018, Cell")

In [None]:
all2 = all2.replace("TabulaMuris2018", "Tabula Muris, 2018, Nature")

In [None]:
all2 = all2.replace("Winkler2022", "Winkler, 2022, Science")

In [None]:
all2 = all2.replace("Yang2022", "Yang, 2022, Nature")

In [None]:
all2 = all2.replace("Vanlandewijck2018", "Vanlandewijck, 2018, Nature")

In [None]:
all2 = all2.replace("Siletti2022", "Siletti, 2022, bioRxiv")

In [None]:
all2 = all2.replace("Garcia2022", "Garcia, 2022, Nature")

In [None]:
#all2 = all2.replace("Ximerakis2019", "Ximerakis, 2019, Nat Neurosci")
#all2 = all2.replace("TabulaMurisSenis2020", "Tabula Muris, 2020, Nature")
#all2 = all2.replace("Zhao2020", "Zhao, 2020, Nat Comm")

## Heatmap

In [None]:
# Format table
all4=all2.set_index(['clusters','source'])['mean_expression'].unstack().reset_index() # unmelt
# change row order to represent groupings
all4.index = all4.clusters
all4=all4.reindex(list(all2.clusters.unique()))
all4=all4.drop("clusters",axis=1)
#all4.sort_values("clusters")
#all4

In [None]:
# create color palette
my_cmap2 = mpl.colors.LinearSegmentedColormap.from_list("", ['#ebebeb','tomato'])
#plt.cm.register_cmap("mycolormap", my_cmap2)
cpal = sns.color_palette("mycolormap", as_cmap=True)
#cpal

In [None]:
with rc_context({'figure.figsize': (4.5,2.5), 'figure.dpi': 120}):
    pl = sns.heatmap(all4, cmap=cpal, vmin=0, vmax=1, linewidths=0.01).set(ylabel=None,xlabel=None);
    plt.tick_params(axis='both', which='major', labelsize=10, labelbottom = False, bottom=False, top = False, labeltop=True)
    plt.xticks(rotation=45, ha = 'left')
    plt.legend(loc=(1.27,0.345), title=gene+' \nexpression \n(mean)', frameon=False)
    #plt.savefig(main_dir+'\\P6_Foxf2_per_celltype\\plots\\'+date.today().strftime("%Y%m%d")+'_Foxf2_by_celltype_heatmap.png', dpi=500, bbox_inches='tight')
    #plt.savefig(main_dir+'\\P6_Foxf2_per_celltype\\plots\\'+date.today().strftime("%Y%m%d")+'_Foxf2_by_celltype_heatmap.svg', dpi=500, bbox_inches='tight')
    plt.show()

## Dotplot

In [None]:
# create color palette
my_cmap2 = mpl.colors.LinearSegmentedColormap.from_list("", ['#d1d1d1','tomato'])
#plt.cm.register_cmap("mycolormap", my_cmap2)
cpal2 = sns.color_palette("mycolormap", as_cmap=True)

In [None]:
all2.columns = ['clusters', 'Mean expression\n       in group', 'Fraction of cells\n   in group (%)', 'gene', 'source','organism', 'cell_number']

In [None]:
all2["Fraction of cells\n   in group (%)"] = all2["Fraction of cells\n   in group (%)"]*100

In [None]:
sns.set(style="white")
pl=sns.relplot(data=all2, x="source", y="clusters",
                hue='Mean expression\n       in group', size='Fraction of cells\n   in group (%)',
                #size_norm=(50, 500),
                palette=cpal2, 
                sizes=(15, 550), # tile size limits
                #marker="s", # round or squared
                linewidth=1,
                #legend=None,
                #aspect=0.9
              );
pl.set(ylabel=None,xlabel=None)
#plt.title("KO vs WT: Enriched gene sets", y=1.02, fontsize=16)
#plt.suptitle("Top "+str(topx_OR)+" DE genes of each brain region, sorted by OR", y=0.97, fontsize=13)
plt.tick_params(axis='both', which='major', labelsize=10, labelbottom = False, bottom=False, top = False, labeltop=True)
plt.xticks(rotation=45, ha = 'left')
pl.set_xticklabels(fontsize=14.4, family="arial", color="black")
#plt.xlabel(family='Arial')
pl.set_yticklabels(fontsize=14.1, family="arial", color="black")
pl.fig.set_size_inches(9.4,4.2)
#plt.legend(loc="upper right")
#sns.despine(bottom = False, left = False, right = False, top = False)
sns.despine(left=True, bottom=True);
#sns.despine(bottom=True, left=True, top=False)

# sns legend
leg = pl._legend
#leg.set_bbox_to_anchor([1.5,0.53])
leg.remove();

# mpl legend
legend = plt.legend(frameon=True, framealpha=0.2, borderpad=0.5, bbox_to_anchor=(1,1), title=gene, # handletextpad=0.7, 
           prop=mpl.font_manager.FontProperties(family='arial', size=10), labelcolor='black')
plt.setp(legend.get_title(), color='black', family='arial', size=13);

In [None]:
plt.show()

In [None]:
pl.savefig(os.path.join(main_dir, 'P06_Foxf2_per_celltype', '202502-Tspo-HermsLab', 'plots', date.today().strftime("%Y%m%d")+'_mtNd1_by_celltype_a.png'), dpi=500)

In [None]:
pl.savefig(os.path.join(main_dir, 'P06_Foxf2_per_celltype', '202502-Tspo-HermsLab', 'plots', date.today().strftime("%Y%m%d")+'_mtNd1_by_celltype_a.svg'), dpi=500)

In [None]:
del all
del all2
del all4

# Session Info

In [None]:
sc.logging.print_versions()

---
# Export HTML

In [None]:
# Use nbconvert conda env
! cp combine_foxf2_results.ipynb HTMLs/$(date '+%Y%m%d')_combine_foxf2_results.ipynb
! jupyter nbconvert HTMLs/$(date '+%Y%m%d')_combine_foxf2_results.ipynb --to html_toc
! rm HTMLs/$(date '+%Y%m%d')_combine_foxf2_results.ipynb