In [9]:
import numpy as np
import pandas as pd

pd.set_option('display.max_columns', None)

In [108]:
corr_df = pd.read_csv(f"data/corrs/hahn_2023_cortex_STAR_TPM_OR_QN_mergeParam0.85_top_MO_Qval_modules_exon_corr.csv", index_col=0)
top_qval_mods_df = pd.read_csv("data/enrichments/hahn_2023_cortex_STAR_TPM_OR_QN_mergeParam0.85_top_MO_Qval_modules.csv")
expr_tpm = pd.read_csv("hahn_2023_cortex_STAR_TPM_SampleNetworks/All_06-23-55/hahn_2023_cortex_STAR_TPM_All_68_outliers_removed.csv", index_col=0)

In [126]:
ctypes = top_qval_mods_df['Cell_type'].loc[top_qval_mods_df['Qval'] < 1e-20].values

In [122]:
mean_expr = expr_tpm.mean(axis=1)
mean_expr.name = "Mean_expr"

# Add mean expression

col_order = ["Gene", "Exon", "Mean_expr"] + ctypes.tolist()

corr_expr_df = (
    corr_df.reset_index(names="Exon")
          .merge(mean_expr, left_on="Gene", right_index=True, how="left")
)
corr_expr_df = corr_expr_df[col_order]

In [131]:
idx = 3
print("Working cell type:", ctypes[idx])

ctype_row = top_qval_mods_df['Cell_type'] == ctypes[idx]
mod = top_qval_mods_df.loc[ctype_row, 'Module'].values[0]
print("Working module:", mod)
kme_df = pd.read_csv(top_qval_mods_df.loc[ctype_row, 'kME_path'].values[0])
kme_cols = [kme_df.columns[0], *kme_df.columns[kme_df.columns.str.contains("TopModPosBC")],  f"kME{mod}"]
kme_mod_df = kme_df[kme_cols]

corr_mod_df = kme_mod_df.merge(corr_expr_df, left_on="Gene", right_on="Gene")

# # Filter for genes NOT in the working cell type module:
# mask = ~corr_mod_df.iloc[:, 1].isin([mod])
# corr_mod_df[mask].sort_values(ctypes[idx], ascending=False).head(20)

corr_mod_df.sort_values(ctypes[idx], ascending=False).head(30)

Working cell type: BAKKEN_2019_PVALB_GABAERGIC_DE_GABA_CLUSTERS
Working module: turquoise


Unnamed: 0,Gene,TopModPosBC_3.72e-08,kMEturquoise,Exon,Mean_expr,YANG_PFC_2021_OLIGODENDROCYTE,BARRES_OLIGODENDROCYTES,YANG_PFC_2021_NRGN_NEURON,BAKKEN_2019_PVALB_GABAERGIC_DE_GABA_CLUSTERS,YANG_PFC_2021_L2_3_EXCITATORY_NEURON,BAKKEN_2019_SST_CHODL_GABAERGIC_DE_GABA_CLUSTERS,YANG_PFC_2021_ASTROCYTE,Mukamel_ExcitatoryNeurons,YANG_PFC_2021_OPC,SUGINO_UP_GLUTAMATERGIC_NEURONS
2052,Camta1,darkred,0.602249,ENSMUSG00000014592_ProteinCoding_1,60.062618,0.431599,0.441579,0.932022,0.927582,0.926205,-0.711426,-0.554159,0.925756,-0.920486,0.926782
7857,Nnat,blue,-0.957015,ENSMUSG00000067786_ProteinCoding_5,221.613463,0.476386,0.488042,0.92021,0.925316,0.921245,-0.653139,-0.472223,0.922959,-0.93017,0.923224
2034,Camk2g,turquoise,0.978504,ENSMUSG00000021820_ProteinCoding_6,138.839471,0.485554,0.495627,0.903646,0.906378,0.901191,-0.63851,-0.568799,0.904976,-0.908589,0.903031
2033,Camk2g,turquoise,0.978504,ENSMUSG00000021820_ProteinCoding_5,138.839471,0.498114,0.506059,0.886984,0.891291,0.890153,-0.527007,-0.654029,0.893061,-0.889633,0.892295
5292,Grin1,turquoise,0.883507,ENSMUSG00000026959_ProteinCoding_3,60.680152,0.554133,0.56106,0.894026,0.891003,0.889244,-0.581097,-0.565143,0.894058,-0.901002,0.890292
10148,Rps24,red,-0.68922,ENSMUSG00000025290_ProteinCoding_1,59.303421,0.460079,0.470777,0.892755,0.886237,0.882741,-0.659529,-0.50671,0.884527,-0.875263,0.884702
7855,Nnat,blue,-0.957015,ENSMUSG00000067786_ProteinCoding_3,221.613463,0.428912,0.441031,0.872906,0.879158,0.874142,-0.653969,-0.425979,0.875459,-0.883684,0.876825
7856,Nnat,blue,-0.957015,ENSMUSG00000067786_ProteinCoding_4,221.613463,0.430417,0.442516,0.871626,0.878268,0.873415,-0.649517,-0.429889,0.874831,-0.881965,0.876348
8115,Ociad1,,-0.41534,ENSMUSG00000029152_ProteinCoding_5,190.104386,0.581701,0.589255,0.881755,0.877607,0.876524,-0.501611,-0.519646,0.882308,-0.892598,0.878089
1987,Cadps,turquoise,0.913917,ENSMUSG00000054423_ProteinCoding_5,26.433888,0.506782,0.514165,0.873742,0.872635,0.871879,-0.581817,-0.55451,0.874885,-0.861579,0.87391


In [84]:
kme_df.loc[kme_df['Gene'] == "Nnat"]

Unnamed: 0,Gene,ModSeed,MeanExpr,MeanExprPercentile,TopModPosBC_1.53e-07,TopModPosFDR_0.0229,TopModNegBC_1.53e-07,TopModNegFDR_0.0229,kMEcyan,kMEcyan.pval,kMEmagenta,kMEmagenta.pval,kMEyellow,kMEyellow.pval,kMEpink,kMEpink.pval,kMEbrown,kMEbrown.pval,kMEblack,kMEblack.pval,kMEsalmon,kMEsalmon.pval,kMEblue,kMEblue.pval,kMElightcyan,kMElightcyan.pval,kMEgreen,kMEgreen.pval,kMEpurple,kMEpurple.pval,kMEtan,kMEtan.pval,kMEgreenyellow,kMEgreenyellow.pval,kMEmidnightblue,kMEmidnightblue.pval,kMEturquoise,kMEturquoise.pval,kMEred,kMEred.pval
13844,Nnat,,6.239052,45.950272,brown,brown,turquoise,turquoise,0.517815,6e-06,0.570344,3.826648e-07,0.597614,7.431135e-08,0.687568,9.394827e-11,0.931481,1.0878449999999999e-30,0.903927,4.86784e-26,0.881688,3.2613250000000005e-23,-0.633552,6.690487e-09,-0.25579,0.03526,-0.618231,1.937957e-08,-0.690841,7.043456e-11,-0.494823,1.8e-05,-0.80422,1.471161e-16,-0.755997,9.13369e-14,-0.951891,1.287986e-35,-0.831041,1.794313e-18
