In [None]:
from anndata import AnnData
import anndata
from scipy import sparse, io
import scipy
import pandas as pd
import scipy.io
import os
import scanpy as sc
import matplotlib.pyplot as plt
import matplotlib
import matplotlib.colors
import numpy as np
import seaborn as sns
import math
import scanpy.external as sce
import scrublet as scr
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd
from scipy.stats import chi2_contingency
from scipy.stats import fisher_exact
from statsmodels.stats.multitest import multipletests

sns.set_theme(font="Arial", font_scale=1, style='ticks')
sc.settings.verbosity = 3
plt.rcParams['figure.figsize'] = (6,6)
plt.rcParams['figure.dpi'] = 100
plt.rc("axes.spines", top=False, right=False)

%config InLineBackend.figure_format = 'retina'
cmap = matplotlib.colors.LinearSegmentedColormap.from_list("", ["#104e8b", "#ffdab9", "#8b0a50"])
batch_palette=['#689aff', '#fdbf6f', '#b15928']

# Input Endothelial Cells (Attach the actual preprocessing above)
test3 = sc.read_h5ad("/data/Projects/phenomata/01.Projects/11.Vascular_Aging/03.Scanpy/test3.h5ad")
test3.obs['Age'] = test3.obs['batch']

In [None]:
leiden_to_celltype_dict = {'0': 'Vascular smooth muscle cells',
'1': 'Vascular smooth muscle cells',
'2': 'Vascular smooth muscle cells',
'3': 'Fibroblasts',
'4': 'Vascular smooth muscle cells',
'5': 'Endothelial cells',
'6': 'Fibroblasts',
'7': 'Endothelial cells',
'8': 'Vascular smooth muscle cells',
'9': 'Fibroblasts',
'10': 'B cells',
'11': 'M\u03A6',
'12': 'T cells',
'13': 'Neuronal cells'}
test3.obs['celltype'] = test3.obs['leiden_r05'].map(lambda x: leiden_to_celltype_dict[x]).astype('category')

lin = ('Endothelial cells', 'Vascular smooth muscle cells', 'Fibroblasts', 'B cells', 'M\u03A6', 'T cells', 'Neuronal cells')
test3.obs['Annotated Cell Types'] = test3.obs['celltype'].cat.reorder_categories(list(lin), ordered=True)

celltype_abbrev = {'Endothelial cells': 'EC',
                   'Vascular smooth muscle cells': 'vSMC',
                   'Fibroblasts': 'FB',
                   'B cells': 'Bc',
                   'M\u03A6': 'M\u03A6',
                   'T cells': 'Tc',
                   'Neuronal cells': 'Neu'
                   }
test3.obs['celltype_abbrev'] = test3.obs['Annotated Cell Types'].map(lambda x: celltype_abbrev[x]).astype('category')
test3.obs['Age_Celltype_Combined'] = test3.obs.apply(lambda x: f"{x['Age']}_{x['celltype_abbrev']}", axis=1)
test3.obs['Age_Celltype_Combined'] = test3.obs['Age_Celltype_Combined'].astype('category')

celltype_colormap = {'Endothelial cells': '#393b79',
                     'Vascular smooth muscle cells': '#9c9ede',
                     'Fibroblasts': '#b5cf6b',
                     'B cells': '#e7ba52',
                     'M\u03A6': '#ad494a',
                     'T cells': '#7b4173',
                     'Neuronal cells': '#de9ed6'} #palette = 'tab20b'
celltype_abbrev_colormap = {'EC': '#393b79',
                            'vSMC': '#9c9ede',
                            'FBs': '#b5cf6b',
                            'Bc': '#e7ba52',
                            'M\u03A6': '#ad494a',
                            'Tc': '#7b4173',
                            'Neu': '#de9ed6'} #palette = 'tab20b'

# Proportion of celltypes across different age groups
df = pd.concat([test3.obs['Age'], test3.obs['Annotated Cell Types']], axis=1)
#ax = pd.crosstab(df['Age'], df['Annotated Cell Types'], normalize=0).sort_values(by='Age', ascending=False).plot.barh(stacked=True, color=celltype_colormap)
ax = pd.crosstab(df['Age'], df['Annotated Cell Types'], normalize='index', margins=True).plot.bar(stacked=True, rot=30, width=1.0, color=celltype_colormap)
ax.legend(loc='upper left', bbox_to_anchor=(0.98, 1.0), frameon=False, prop={'size':10})
ax.set_ylim((0.0, 1.0))
ax.set_xlabel('')
ax.set_ylabel('Fraction of Annotated Cell Types')
plt.tight_layout()

# Correlation plot of Age_Celltype_Combined
sc.tl.dendrogram(test3, groupby='Age_Celltype_Combined', cor_method='pearson', linkage_method='complete') # using X_pca with n_pcs=100
ax = sc.pl.correlation_matrix(test3, groupby='Age_Celltype_Combined', show_correlation_numbers=True, dendrogram=True, ax=None, cmap=cmap, vmin=-1, vmax=1)



In [None]:
'''fig, axes = plt.subplots(1, 2, figsize=(9, 4))

sc.pl.umap(test3_endo, color=['Age'], add_outline=False, legend_loc='right margin', show=False, size=80, ax=axes[0])
axes[0].legend(frameon=False, loc='lower center', bbox_to_anchor=(0.45, 0))
sc.pl.umap(test3_endo, color=['Subpopulations of Endothelial Cells'], add_outline=False, legend_loc='on data', color_map=cmap, palette='Set3', show=False, size=80, ax=axes[1])
sns.despine(ax=axes[0])
sns.despine(ax=axes[1])
axes[0].text(-7.5, 8, "A", size=20, weight='bold')
axes[1].text(-7.5, 8, "B", size=20, weight='bold')'''

np.random.seed(0)
rand_is = np.random.permutation(list(range(test3.shape[0])))

sc.pl.umap(test3[rand_is, :][test3.obs['Age'] == 'm01'], add_outline=False, legend_loc='right_margin', size=30, legend_fontsize=10, color_map=cmap, color=['Annotated Cell Types'], palette='tab20b', title='m01')
sc.pl.umap(test3[rand_is, :][test3.obs['Age'] == 'm10'], add_outline=False, legend_loc='right_margin', size=30, legend_fontsize=10, color_map=cmap, color=['Annotated Cell Types'], palette='tab20b', title='m10')
sc.pl.umap(test3[rand_is, :][test3.obs['Age'] == 'm20'], add_outline=False, legend_loc='right_margin', size=30, legend_fontsize=10, color_map=cmap, color=['Annotated Cell Types'], palette='tab20b', title='m20')


### Detailed cell type (used for Cell-Cell communications)

In [None]:
leiden_to_detailed_celltype_dict = {'0': 'vSMC1',
'1': 'vSMC2',
'2': 'vSMC3',
'3': 'FB1',
'4': 'vSMC4',
'5': 'EC1',
'6': 'FB2',
'7': 'EC2',
'8': 'vSMC5',
'9': 'FB3',
'10': 'Bc',
'11': 'M\u03A6',
'12': 'Tc',
'13': 'Neu'}
test3.obs['detailed_celltype'] = test3.obs['leiden_r05'].map(lambda x: leiden_to_detailed_celltype_dict[x]).astype('category')
detailed_celltype_order = ('EC1', 'EC2', 'vSMC1', 'vSMC2', 'vSMC3', 'vSMC4', 'vSMC5', 'FB1', 'FB2', 'FB3', 'Bc', 'M\u03A6', 'Tc', 'Neu')
test3.obs['detailed_celltype'] = test3.obs['detailed_celltype'].cat.reorder_categories(list(detailed_celltype_order), ordered=True)

test3.obs['Age_detailed_Celltype_Combined'] = test3.obs.apply(lambda x: f"{x['Age']}_{x['detailed_celltype']}", axis=1)
test3.obs['Age_detailed_Celltype_Combined'] = test3.obs['Age_detailed_Celltype_Combined'].astype('category')
sc.tl.dendrogram(test3, groupby='Age_detailed_Celltype_Combined', cor_method='pearson', linkage_method='complete') # using X_pca with n_pcs=100
ax = sc.pl.correlation_matrix(test3, groupby='Age_detailed_Celltype_Combined', show_correlation_numbers=False, dendrogram=True, ax=None, cmap=cmap, vmin=-1, vmax=1)

detailed_celltype_colormap = {'EC1': '#393b79', 
                              'EC2': '#5254a3', 
                              'vSMC1': '#9c9ede', 
                              'vSMC2': '#637939', 
                              'vSMC3': '#b5cf6b', 
                              'vSMC4': '#cedb9c', 
                              'vSMC5': '#bd9e39', 
                              'FB1': '#e7ba52', 
                              'FB2': '#843c39', 
                              'FB3': '#ad494a', 
                              'Bc': '#e7969c', 
                              'MΦ': '#7b4173', 
                              'Tc': '#ce6dbd', 
                              'Neu': '#de9ed6'} #palette = 'tab20b'

list2 = ['#393b79',
 '#5254a3',
 '#9c9ede',
 '#637939',
 '#b5cf6b',
 '#cedb9c',
 '#bd9e39',
 '#e7ba52',
 '#843c39',
 '#ad494a',
 '#e7969c',
 '#7b4173',
 '#ce6dbd',
 '#de9ed6']

sc.pl.umap(test3[rand_is,:], add_outline=False, legend_loc='on data', size=20, legend_fontsize=8, legend_fontoutline=1, color=['detailed_celltype'], palette='tab20b', title='Fine-grained Cell Types')

### Proportion of each subcluster across age groups and Odds ratio

In [None]:
#df = test3[test3.obs['detailed_celltype'].isin(['vSMC1', 'vSMC2', 'vSMC3', 'vSMC4', 'vSMC5', 'FB1', 'FB2', 'FB3', 'EC1', 'EC2'])].obs[['Age', 'detailed_celltype']] # Originally for odds ratio calculation the Immune cell and neuronal cell were discarded, yet this procedure poses the question
df = test3.obs[['Age', 'detailed_celltype']]
df_pivot = pd.crosstab(df['Age'], df['detailed_celltype'], normalize=False, margins=True)

# Percentage (intracluster normalization) heatmap 
#detailed_celltype_order = ('vSMC1', 'vSMC2', 'vSMC3', 'vSMC4', 'vSMC5', 'FB1', 'FB2', 'FB3', 'EC1', 'EC2')
detailed_celltype_order = ('vSMC1', 'vSMC2', 'vSMC3', 'vSMC4', 'vSMC5', 'FB1', 'FB2', 'FB3', 'EC1', 'EC2', 'Bc', 'M\u03A6', 'Tc', 'Neu')
df_pivot_percentage = pd.crosstab(df['Age'], df['detailed_celltype'], margins=False, normalize='columns')*100
df_pivot_percentage = df_pivot_percentage.reindex(columns=detailed_celltype_order)

# Proportion "Excluding" Immune cells and Neuronal cells
plt.rcParams['figure.figsize'] = (16.5,5) # Temporarily change the figure size parameter
ax = sns.heatmap(df_pivot_percentage.iloc[:, :-4], vmin=20, vmax=65, cmap='viridis', linewidths=0.2, annot=True, fmt='.2f')
ax.set_xlabel('Fine-grained Cell types (excluding Immune cells and Neuronal cells)', fontsize=13)
ax.set_ylabel('Age', fontsize=13)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
ax.set_aspect(1) 
heatmap_pos = ax.get_position()
color_legend = ax.collections[0].colorbar.ax
legend_pos = color_legend.get_position()
color_legend.set_position([legend_pos.x0 - 0.03, heatmap_pos.y0, legend_pos.width, heatmap_pos.height])
# make each box of the heatmap square shaped
title = plt.text(legend_pos.x0 + 9.9, legend_pos.y0 + 1.5, 'Proportion (%)', rotation=270, ha='left', va='center')

# Proportion "Including" Immune cells and Neuronal cells
plt.rcParams['figure.figsize'] = (16.5,5) # Temporarily change the figure size parameter
ax = sns.heatmap(df_pivot_percentage, vmin=0, vmax=90, cmap='viridis', linewidths=0.2, annot=True, fmt='.2f')
ax.set_xlabel('Fine-grained Cell types', fontsize=13)
ax.set_ylabel('Age', fontsize=13)
ax.set_yticklabels(ax.get_yticklabels(), rotation=0)
ax.set_aspect(1.1) 
heatmap_pos = ax.get_position()
color_legend = ax.collections[0].colorbar.ax
legend_pos = color_legend.get_position()
color_legend.set_position([legend_pos.x0 - 0.03, heatmap_pos.y0, legend_pos.width, heatmap_pos.height])
# make each box of the heatmap square shaped
title = plt.text(legend_pos.x0 + 14, legend_pos.y0 + 1.5, 'Proportion (%)', rotation=270, ha='left', va='center')



sns.set_theme(font="Arial", font_scale=1, style='ticks') # Change figure parameter to as it was
sc.settings.verbosity = 3
plt.rcParams['figure.figsize'] = (6,6)
plt.rcParams['figure.dpi'] = 100
plt.rc("axes.spines", top=False, right=False)

# Odds ratio calculation
oddsratio_df, pvalue_df = list(), list()
for month in df_pivot.index[:-1]:
    oddsratio, pvalues = list(), list()
    for celltype in df_pivot.columns[:-1]:
#        table = np.array([ [df_pivot[celltype][month], df_pivot[celltype]['All'] - df_pivot[celltype][month] ], [df_pivot['All'][month], df_pivot['All']['All'] - df_pivot['All'][month]] ])
        table = np.array([ [df_pivot[celltype][month], df_pivot['All'][month] - df_pivot[celltype][month]], [df_pivot[celltype]['All'] - df_pivot[celltype][month], (df_pivot['All']['All'] - df_pivot['All'][month]) - (df_pivot[celltype]['All'] - df_pivot[celltype][month])] ])
        oddsr, p = fisher_exact(table, alternative='two-sided')
        oddsratio.append(oddsr)
        pvalues.append(p)
    oddsratio_df.append(oddsratio)
    pvalues = multipletests(pvals=pvalues, alpha=0.01, method='fdr_bh')[1]
    pvalue_df.append(pvalues)

Odds = pd.DataFrame(oddsratio_df, index=df_pivot.index[:-1], columns=df_pivot.columns[:-1])
Pvalues = pd.DataFrame(pvalue_df, index=df_pivot.index[:-1], columns=df_pivot.columns[:-1])

df_final = pd.concat([Odds, Pvalues], axis=0)
df_final

batch_palette= {'m01': '#689aff',
                'm10': '#fdbf6f',
                'm20': '#b15928'}
plt.rcParams['figure.figsize'] = (10,6)
ax = df_final.iloc[:3, :-4].T.plot.bar(color=batch_palette, rot=0) # Odds ratio
ax.legend(loc='upper left', bbox_to_anchor=(0.75, 0.95), frameon=False, prop={'size':15})
ax.set_xlabel('Fine-grained Cell types (excluding Immune cells and Neuronal cells)')
ax.set_ylabel('Odds Ratio')
ax.axhline(y=1.0, color="red", linewidth=1.0, linestyle='--')
plt.tight_layout()
sns.despine(ax=ax)

sns.set_theme(font="Arial", font_scale=1, style='ticks') # Change figure parameter to as it was
sc.settings.verbosity = 3
plt.rcParams['figure.figsize'] = (6,6)
plt.rcParams['figure.dpi'] = 100
plt.rc("axes.spines", top=False, right=False)
#plt.savefig("FigureD.pdf")

### Senesence-associated screted protein (SASP)

In [None]:
dbf = open("/data/Projects/phenomata/01.Projects/11.Vascular_Aging/03.Scanpy/REACTOME_SASP_genes_mm.txt", 'r')
sasp_reactome = list(map(lambda x: x.strip('\n'), dbf.readlines()))
sc.tl.score_genes(test3, sasp_reactome, score_name='senescence_score')

df_senescence_score = test3.obs[['senescence_score', 'Age']]

ax = sns.violinplot(df_senescence_score, x="Age", y="senescence_score", inner=None, palette=batch_palette)
sns.stripplot(df_senescence_score, x="Age", y="senescence_score", color='black', size=1, alpha=0.5 jitter=True, ax=ax)

#### Perform One-way ANOVA
m01_sasp = df_senescence_score.loc[df_senescence_score["Age"] == "m01", "senescence_score"]
m10_sasp = df_senescence_score.loc[df_senescence_score["Age"] == "m10", "senescence_score"]
m20_sasp = df_senescence_score.loc[df_senescence_score["Age"] == "m20", "senescence_score"]

fstat, pvalue = scipy.stats.f_oneway(m01_sasp, m10_sasp, m20_sasp)

print(f"F-statistic: {fstat}")
print(f"P-value: {pvalue}")
'''(2023-06-01)
F-statistic: 33.024522888071694
P-value: 5.082324132973939e-15
'''
#### Perform Tukey’s Honestly Significant Difference (HSD) test
posthoc = pairwise_tukeyhsd(df_senescence_score["senescence_score"], df_senescence_score["Age"])
print(posthoc)



## 나중에 하자 시간이 없다 (2023-06-01)
df_detailed_senescence_score = test3.obs[['senescence_score', 'Age']]

ax = sns.violinplot(df_senescence_score, x="Age", y="senescence_score", inner=None, palette=batch_palette)
sns.stripplot(df_senescence_score, x="Age", y="senescence_score", color='black', size=1, alpha=0.5 jitter=True, ax=ax)

#### Perform One-way ANOVA
m01_sasp = df_senescence_score.loc[df_senescence_score["Age"] == "m01", "senescence_score"]
m10_sasp = df_senescence_score.loc[df_senescence_score["Age"] == "m10", "senescence_score"]
m20_sasp = df_senescence_score.loc[df_senescence_score["Age"] == "m20", "senescence_score"]

fstat, pvalue = scipy.stats.f_oneway(m01_sasp, m10_sasp, m20_sasp)

print(f"F-statistic: {fstat}")
print(f"P-value: {pvalue}")
'''(2023-06-01)
F-statistic: 33.024522888071694
P-value: 5.082324132973939e-15
'''
#### Perform Tukey’s Honestly Significant Difference (HSD) test
posthoc = pairwise_tukeyhsd(df_senescence_score["senescence_score"], df_senescence_score["Age"])
print(posthoc)

### Analysis using Principal components

In [None]:
column_name_pca1_100 = list(map(lambda x: f'PC{x}', range(1,101)))
test3_pca_loadings = pd.DataFrame(test3.varm['PCs'], columns = column_name_pca1_100, index = test3.var_names) # PCA loadings of each gene for PC1 through PC100
