In [1]:
import pandas as pd
from ALLCools.mcds import MCDS
from scipy.stats import f_oneway, ttest_ind
import joblib
import anndata

In [2]:
gene_name_to_id = joblib.load('../freq_used_files/gene_name_to_id_dict')
gene_id_to_name = joblib.load('../freq_used_files/gene_id_to_name_dict')
gene_meta = pd.read_csv("../freq_used_files/GeneMetadata.csv.gz", index_col=0)

In [3]:
age_young = '9mo'
age_old = '18mo'

In [4]:
mcds_paths = "../freq_used_files/geneslop2k_frac.mcds"
var_dim = "geneslop2k"

In [5]:
adata = anndata.read_h5ad('All_Genes.mCH.h5ad')
adata.X = - adata.X

In [6]:
gene_name_to_id = joblib.load('../freq_used_files/gene_name_to_id_dict')
gene_id_to_name = joblib.load('../freq_used_files/gene_id_to_name_dict')

In [7]:
pvalue_cutoff = 0.001
ch_change_cutoff = 0.4
#cg_change_cutoff = 0.2

In [8]:
meta = pd.read_csv("../freq_used_files/221027_AMB_metadata.csv", index_col=0)
all_celltypes = meta["MajorType"].unique().tolist()
nn_celltypes = ['Oligo NN','Astro NN','OPC NN','Microglia NN']
neu_celltypes = list(set(all_celltypes) ^ set(nn_celltypes))

len(all_celltypes), len(nn_celltypes), len(neu_celltypes)

(38, 4, 34)

In [9]:
meta = pd.read_csv("../freq_used_files/221027_AMB_metadata.csv", index_col=0)
meta = meta[(meta["Age"].isin([f"{age_young}", f"{age_old}"])) & (meta["MajorType"].isin(neu_celltypes))]

In [10]:
mcds =  MCDS.open(mcds_paths, var_dim=var_dim, use_obs=meta.index).sel(
        mc_type="CHN")

In [11]:
mcds.coords['MajorType'] = meta['MajorType']
mcds.coords['Age'] = meta['Age']

In [12]:
celltype_admgs = {}
all_celltype_gene_pvalue = []
all_celltype_gene_change = []

for mt ,tmp_mcds in mcds.groupby('MajorType'):
    tmp_young_cells = tmp_mcds.get_index('cell')[tmp_mcds["Age"] == f"{age_young}"]
    tmp_old_cells = tmp_mcds.get_index('cell')[tmp_mcds["Age"] == f"{age_old}"]
    
    tmp_gene_frac = tmp_mcds["geneslop2k_da_frac"].to_pandas()
    genes = tmp_gene_frac.columns.tolist()
    
    statistic, p_value = ttest_ind(tmp_gene_frac.loc[tmp_young_cells], tmp_gene_frac.loc[tmp_old_cells], axis=0)
    
    celltype_gene_pvalue = pd.DataFrame({f"{mt}": p_value}, index=genes)
    all_celltype_gene_pvalue.append(celltype_gene_pvalue)
    f_celltype_gene_pvalue = celltype_gene_pvalue[celltype_gene_pvalue[f'{mt}'] < pvalue_cutoff]
    
    celltype_gene_change = pd.DataFrame(tmp_gene_frac.loc[tmp_old_cells].mean() - tmp_gene_frac.loc[tmp_young_cells].mean(), columns = [f'{mt}'])
    all_celltype_gene_change.append(celltype_gene_change)
    
    f_celltype_gene_change = celltype_gene_change.loc[f_celltype_gene_pvalue.index]
    f_celltype_gene_change = f_celltype_gene_change[abs(f_celltype_gene_change[f'{mt}']) > ch_change_cutoff]
    
    celltype_admgs[mt] = f_celltype_gene_change.index.tolist()

In [13]:
joblib.dump(celltype_admgs, f'result/celltype_admgs_{age_young}_{age_old}.dict')

['result/celltype_admgs_9mo_18mo.dict']

In [14]:
all_celltype_gene_pvalue_df = pd.concat(all_celltype_gene_pvalue,axis = 1)
all_celltype_gene_change_df = pd.concat(all_celltype_gene_change, axis = 1)

In [15]:
all_celltype_gene_pvalue_df.to_csv(f'result/pvalues_celltype_admgs_{age_young}_{age_old}.csv')
all_celltype_gene_change_df.to_csv(f'result/change_celltype_admgs_{age_young}_{age_old}.csv')