In [1]:
import pandas as pd
from ALLCools.mcds import MCDS
from scipy.stats import f_oneway, ttest_ind
import joblib
import anndata

In [2]:
gene_name_to_id = joblib.load('../freq_used_files/gene_name_to_id_dict')
gene_id_to_name = joblib.load('../freq_used_files/gene_id_to_name_dict')
gene_meta = pd.read_csv("../freq_used_files/GeneMetadata.csv.gz", index_col=0)

## pairwise compair

In [3]:
pvalue_cutoff = 0.01
ch_change_cutoff = 0.4

In [6]:
for age_pair in ['8wk-18mo','8wk-9mo','9mo-18mo']:
    age_young, age_old = age_pair.split('-')
    
    all_celltype_gene_pvalue_df = pd.read_csv(f'result/pvalues_celltype_admgs_{age_young}_{age_old}.csv', index_col =0 )
    all_celltype_gene_change_df = pd.read_csv(f'result/change_celltype_admgs_{age_young}_{age_old}.csv', index_col =0)
    
    celltype_admgs = {}

    for celltype_of_interest in all_celltype_gene_change_df.columns:
        if celltype_of_interest == 'PAG/PCG':
            continue
        tmpdf = pd.concat([all_celltype_gene_pvalue_df[celltype_of_interest], all_celltype_gene_change_df[celltype_of_interest]], axis = 1)
        tmpdf.columns = ['pvalue','change']
        tmpdf['abs_changes'] = abs(tmpdf['change'])

        f_tmpdf = tmpdf[(tmpdf['pvalue'] < pvalue_cutoff) & (tmpdf['abs_changes'] > ch_change_cutoff)].copy()

        if f_tmpdf.shape[0] > 4:
            use_genes = f_tmpdf.sort_values('abs_changes', ascending = False).head(4).index.tolist()
        elif f_tmpdf.shape[0] == 0:
            use_genes = tmpdf.sort_values('abs_changes', ascending = False).head(1).index.tolist()
        else:
            use_genes = f_tmpdf.index.tolist()

        celltype_admgs[celltype_of_interest] = use_genes  

    joblib.dump(celltype_admgs, f'result/celltype_admgs_{age_young}_{age_old}.dict')

## 3 age group skipped

In [2]:
mcds_paths = "../230410_panel_design_v1/geneslop2k_frac.mcds"
var_dim = "geneslop2k"

In [None]:
adata = anndata.read_h5ad('../230424_panel_design_v2/adata/All_Genes.mCH.h5ad')
adata.X = - adata.X

In [None]:
gene_name_to_id = joblib.load('../gene_name_to_id_dict')
gene_id_to_name = joblib.load('../gene_id_to_name_dict')

In [None]:
pvalue_cutoff = 0.001
ch_change_cutoff = 0.4
#cg_change_cutoff = 0.2

In [None]:
meta = pd.read_csv("../221027_AMB_metadata.csv", index_col=0)
all_celltypes = meta["MajorType"].unique().tolist()
nn_celltypes = ['Oligo NN','Astro NN','OPC NN','Microglia NN']
neu_celltypes = list(set(all_celltypes) ^ set(nn_celltypes))

len(all_celltypes), len(nn_celltypes), len(neu_celltypes)

In [None]:
meta = pd.read_csv("../221027_AMB_metadata.csv", index_col=0)
meta = meta[(meta["Age"].isin(["8wk", "18mo"])) & (meta["MajorType"].isin(neu_celltypes))]

In [None]:
mcds =  MCDS.open(mcds_paths, var_dim=var_dim, use_obs=meta.index).sel(
        mc_type="CHN")

In [None]:
mcds.coords['MajorType'] = meta['MajorType']
mcds.coords['Age'] = meta['Age']

In [9]:
celltype_admgs_8wk_18mo = {}
all_celltype_gene_pvalue = []
all_celltype_gene_change = []

for mt ,tmp_mcds in mcds.groupby('MajorType'):
    tmp_8wk_cells = tmp_mcds.get_index('cell')[tmp_mcds["Age"] == "8wk"]
    tmp_18mo_cells = tmp_mcds.get_index('cell')[tmp_mcds["Age"] == "18mo"]
    
    tmp_gene_frac = tmp_mcds["geneslop2k_da_frac"].to_pandas()
    genes = tmp_gene_frac.columns.tolist()
    
    statistic, p_value = ttest_ind(tmp_gene_frac.loc[tmp_8wk_cells], tmp_gene_frac.loc[tmp_18mo_cells], axis=0)
    
    celltype_gene_pvalue = pd.DataFrame({f"{mt}": p_value}, index=genes)
    all_celltype_gene_pvalue.append(celltype_gene_pvalue)
    f_celltype_gene_pvalue = celltype_gene_pvalue[celltype_gene_pvalue[f'{mt}'] < pvalue_cutoff]
    
    celltype_gene_change = pd.DataFrame(tmp_gene_frac.loc[tmp_18mo_cells].mean() - tmp_gene_frac.loc[tmp_8wk_cells].mean(), columns = [f'{mt}'])
    all_celltype_gene_change.append(celltype_gene_change)
    
    f_celltype_gene_change = celltype_gene_change.loc[f_celltype_gene_pvalue.index]
    f_celltype_gene_change = f_celltype_gene_change[abs(f_celltype_gene_change[f'{mt}']) > ch_change_cutoff]
    
    celltype_admgs_8wk_18mo[mt] = f_celltype_gene_change.index.tolist()

In [10]:
joblib.dump(celltype_admgs_8wk_18mo, 'result/celltype_admgs_8wk_18mo.dict')

['result/celltype_admgs_8wk_18mo.dict']

In [None]:
# all_celltype_gene_pvalue_df = pd.concat(all_celltype_gene_pvalue,axis = 1)
# all_celltype_gene_change_df = pd.concat(all_celltype_gene_change, axis = 1)

In [None]:
# all_celltype_gene_pvalue_df.to_csv('result/pvalues_celltype_admgs_8wk_18mo.csv')
# all_celltype_gene_change_df.to_csv('result/change_celltype_admgs_8wk_18mo.csv')

## 8wk, 9mo and 18mo

In [10]:
meta = pd.read_csv("../221027_AMB_metadata.csv", index_col=0)
meta = meta[(meta["Age"].isin(["8wk", '9mo',"18mo"])) & (meta["MajorType"].isin(neu_celltypes))]

In [None]:
mcds =  MCDS.open(mcds_paths, var_dim=var_dim, use_obs=meta.index).sel(
        mc_type="CHN")

In [11]:
mcds.coords['MajorType'] = meta['MajorType']
mcds.coords['Age'] = meta['Age']

In [12]:
celltype_admgs_8wk_9mo_18mo = {}
all_celltype_gene_pvalue = []
all_celltype_gene_change = []


for mt ,tmp_mcds in mcds.groupby('MajorType'):

    tmp_8wk_cells = tmp_meta[tmp_meta["Age"] == "8wk"].index
    tmp_9mo_cells = tmp_meta[tmp_meta["Age"] == "9mo"].index
    tmp_18mo_cells = tmp_meta[tmp_meta["Age"] == "18mo"].index

    tmp_gene_frac = tmp_mcds["geneslop2k_da_frac"].to_pandas()

    genes = tmp_gene_frac.columns.tolist()

    F, p_value = f_oneway(
        tmp_gene_frac.loc[tmp_8wk_cells],
        tmp_gene_frac.loc[tmp_9mo_cells],
        tmp_gene_frac.loc[tmp_18mo_cells],
        axis=0,
    )
    celltype_gene_pvalue = pd.DataFrame({f"{test_celltype}": p_value}, index=genes)
    celltype_gene_pvalue = celltype_gene_pvalue[celltype_gene_pvalue[f'{test_celltype}'] < pvalue_cutoff]
    
    average_df = pd.concat([tmp_gene_frac.loc[tmp_8wk_cells].mean(), tmp_gene_frac.loc[tmp_9mo_cells].mean(), tmp_gene_frac.loc[tmp_18mo_cells].mean()], axis = 1)
    average_df = average_df.loc[celltype_gene_pvalue.index]
    average_df.columns = ['8wk','9mo','18mo']
    
    increase_df = average_df[(average_df['8wk'] < average_df['9mo']) & (average_df['9mo'] < average_df['18mo'])].copy()
    decrease_df = average_df[(average_df['8wk'] > average_df['9mo']) & (average_df['9mo'] > average_df['18mo'])].copy()
    
    increase_df = increase_df[abs(increase_df['18mo'] - increase_df['8wk']) > ch_change_cutoff]
    decrease_df = decrease_df[abs(decrease_df['18mo'] - decrease_df['8wk']) > ch_change_cutoff]
    
    average_df = pd.concat([increase_df, decrease_df])
    celltype_admgs_8wk_9mo_18mo[test_celltype] = average_df.index.tolist()
    


In [14]:
joblib.dump(celltype_admgs_8wk_9mo_18mo, 'celltype_admgs_8wk_9mo_18mo.dict')

['celltype_admgs_8wk_9mo_18mo.dict']