In [1]:
import pandas as pd
from ALLCools.mcds import MCDS
from scipy.stats import f_oneway, ttest_ind
import joblib
import anndata

In [2]:
mcds_paths = "../freq_used_files/geneslop2k_frac.mcds"
var_dim = "geneslop2k"

In [3]:
adata = anndata.read_h5ad('All_Genes.mCH.h5ad')
adata.X = - adata.X

In [4]:
meta = pd.read_csv("../freq_used_files/221027_AMB_metadata.csv", index_col=0)
meta = meta[meta["Age"].isin(["8wk", "18mo"])]

gene_name_to_id = joblib.load('../freq_used_files/gene_name_to_id_dict')
gene_id_to_name = joblib.load('../freq_used_files/gene_id_to_name_dict')

In [5]:
pvalue_cutoff = 0.01
cg_change_cutoff = 0.15
mc_type = 'CGN'

In [6]:
all_celltypes = meta["MajorType"].unique().tolist()
nn_celltypes = ['Oligo NN','Astro NN','OPC NN','Microglia NN']
neu_celltypes = list(set(all_celltypes) ^ set(nn_celltypes))
len(all_celltypes), len(nn_celltypes), len(neu_celltypes)

(38, 4, 34)

In [7]:
age_young = '8wk'
age_old = '18mo'

## 8 week vs. 18 month

In [8]:
meta = pd.read_csv("../freq_used_files/221027_AMB_metadata.csv", index_col=0)
meta = meta[(meta["Age"].isin([f"{age_young}", f"{age_old}"])) & (meta["MajorType"].isin(nn_celltypes))]

In [9]:
mcds =  MCDS.open(mcds_paths, var_dim=var_dim, use_obs=meta.index).sel(
        mc_type=mc_type)

In [10]:
mcds.coords['MajorType'] = meta['MajorType']
mcds.coords['Age'] = meta['Age']

In [11]:
for age_pair in ['8wk_9mo','8wk_18mo','9mo_18mo']:
    
    age_young, age_old = age_pair.split('_')
    
    celltype_admgs = {}
    all_celltype_gene_pvalue = []
    all_celltype_gene_change = []

    for mt ,tmp_mcds in mcds.groupby('MajorType'):
        tmp_young_cells = tmp_mcds.get_index('cell')[tmp_mcds["Age"] == f"{age_young}"]
        tmp_old_cells = tmp_mcds.get_index('cell')[tmp_mcds["Age"] == f"{age_old}"]

        tmp_gene_frac = tmp_mcds["geneslop2k_da_frac"].to_pandas()
        genes = tmp_gene_frac.columns.tolist()

        statistic, p_value = ttest_ind(
            tmp_gene_frac.loc[tmp_young_cells], tmp_gene_frac.loc[tmp_old_cells], axis=0
        )

        celltype_gene_pvalue = pd.DataFrame({f"{mt}": p_value}, index=genes)
        all_celltype_gene_pvalue.append(celltype_gene_pvalue)
        f_celltype_gene_pvalue = celltype_gene_pvalue[celltype_gene_pvalue[f'{mt}'] < pvalue_cutoff]

        celltype_gene_change = pd.DataFrame(tmp_gene_frac.loc[tmp_old_cells].mean() - tmp_gene_frac.loc[tmp_young_cells].mean(), columns = [f'{mt}'])
        all_celltype_gene_change.append(celltype_gene_change)

        f_celltype_gene_change = celltype_gene_change.loc[f_celltype_gene_pvalue.index]
        f_celltype_gene_change = f_celltype_gene_change[abs(celltype_gene_change[f'{mt}']) > cg_change_cutoff]

        celltype_admgs[mt] = f_celltype_gene_change.index.tolist()
        
    joblib.dump(celltype_admgs, f'result/nn_celltype_admgs_{age_young}_{age_old}.dict')

In [12]:
# all_celltype_gene_pvalue_df = pd.concat(all_celltype_gene_pvalue,axis = 1)
# all_celltype_gene_change_df = pd.concat(all_celltype_gene_change, axis = 1)

In [13]:
# joblib.dump(celltype_admgs, f'result/nn_celltype_admgs_{age_young}_{age_old}.dict')
# all_celltype_gene_pvalue_df.to_csv(f'result/pvalues_nn_celltype_admgs_{age_young}_{age_old}.csv')
# all_celltype_gene_change_df.to_csv(f'result/change_nn_elltype_admgs_{age_young}_{age_old}.csv')

## 8wk, 9mo and 18mo

In [10]:
meta = pd.read_csv("../221027_AMB_metadata.csv", index_col=0)
meta = meta[meta["Age"].isin(["8wk", "9mo", "18mo"])]

In [11]:
all_celltypes = meta["MajorType"].unique().tolist()

In [12]:
celltype_admgs_8wk_9mo_18mo = {}
for test_celltype in neu_celltypes:
    # get gene fraction dataframe
    tmp_meta = meta[meta["MajorType"] == test_celltype].copy()

    tmp_8wk_cells = tmp_meta[tmp_meta["Age"] == "8wk"].index
    tmp_9mo_cells = tmp_meta[tmp_meta["Age"] == "9mo"].index
    tmp_18mo_cells = tmp_meta[tmp_meta["Age"] == "18mo"].index

    tmp_mcds = MCDS.open(mcds_paths, var_dim=var_dim, use_obs=tmp_meta.index).sel(
        mc_type="CHN"
    )
    tmp_gene_frac = tmp_mcds["geneslop2k_da_frac"].to_pandas()

    genes = tmp_gene_frac.columns.tolist()

    F, p_value = f_oneway(
        tmp_gene_frac.loc[tmp_8wk_cells],
        tmp_gene_frac.loc[tmp_9mo_cells],
        tmp_gene_frac.loc[tmp_18mo_cells],
        axis=0,
    )
    celltype_gene_pvalue = pd.DataFrame({f"{test_celltype}": p_value}, index=genes)
    celltype_gene_pvalue = celltype_gene_pvalue[celltype_gene_pvalue[f'{test_celltype}'] < pvalue_cutoff]
    
    average_df = pd.concat([tmp_gene_frac.loc[tmp_8wk_cells].mean(), tmp_gene_frac.loc[tmp_9mo_cells].mean(), tmp_gene_frac.loc[tmp_18mo_cells].mean()], axis = 1)
    average_df = average_df.loc[celltype_gene_pvalue.index]
    average_df.columns = ['8wk','9mo','18mo']
    
    increase_df = average_df[(average_df['8wk'] < average_df['9mo']) & (average_df['9mo'] < average_df['18mo'])].copy()
    decrease_df = average_df[(average_df['8wk'] > average_df['9mo']) & (average_df['9mo'] > average_df['18mo'])].copy()
    
    increase_df = increase_df[abs(increase_df['18mo'] - increase_df['8wk']) > ch_change_cutoff]
    decrease_df = decrease_df[abs(decrease_df['18mo'] - decrease_df['8wk']) > ch_change_cutoff]
    
    average_df = pd.concat([increase_df, decrease_df])
    celltype_admgs_8wk_9mo_18mo[test_celltype] = average_df.index.tolist()
    
for test_celltype in nn_celltypes:
    # get gene fraction dataframe
    tmp_meta = meta[meta["MajorType"] == test_celltype].copy()

    tmp_8wk_cells = tmp_meta[tmp_meta["Age"] == "8wk"].index
    tmp_9mo_cells = tmp_meta[tmp_meta["Age"] == "9mo"].index
    tmp_18mo_cells = tmp_meta[tmp_meta["Age"] == "18mo"].index

    tmp_mcds = MCDS.open(mcds_paths, var_dim=var_dim, use_obs=tmp_meta.index).sel(
        mc_type="CGN"
    )
    tmp_gene_frac = tmp_mcds["geneslop2k_da_frac"].to_pandas()

    genes = tmp_gene_frac.columns.tolist()

    F, p_value = f_oneway(
        tmp_gene_frac.loc[tmp_8wk_cells],
        tmp_gene_frac.loc[tmp_9mo_cells],
        tmp_gene_frac.loc[tmp_18mo_cells],
        axis=0,
    )
    celltype_gene_pvalue = pd.DataFrame({f"{test_celltype}": p_value}, index=genes)
    celltype_gene_pvalue = celltype_gene_pvalue[celltype_gene_pvalue[f'{test_celltype}'] < pvalue_cutoff]
    
    average_df = pd.concat([tmp_gene_frac.loc[tmp_8wk_cells].mean(), tmp_gene_frac.loc[tmp_9mo_cells].mean(), tmp_gene_frac.loc[tmp_18mo_cells].mean()], axis = 1)
    average_df = average_df.loc[celltype_gene_pvalue.index]
    average_df.columns = ['8wk','9mo','18mo']
    
    increase_df = average_df[(average_df['8wk'] < average_df['9mo']) & (average_df['9mo'] < average_df['18mo'])].copy()
    decrease_df = average_df[(average_df['8wk'] > average_df['9mo']) & (average_df['9mo'] > average_df['18mo'])].copy()
    
    increase_df = increase_df[abs(increase_df['18mo'] - increase_df['8wk']) > cg_change_cutoff]
    decrease_df = decrease_df[abs(decrease_df['18mo'] - decrease_df['8wk']) > cg_change_cutoff]
    
    average_df = pd.concat([increase_df, decrease_df])
    celltype_admgs_8wk_9mo_18mo[test_celltype] = average_df.index.tolist()

In [14]:
joblib.dump(celltype_admgs_8wk_9mo_18mo, 'celltype_admgs_8wk_9mo_18mo.dict')

['celltype_admgs_8wk_9mo_18mo.dict']