In [1]:
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt

## Create combined in vivo + ex vivo DGE tables for correlation analyses

In [2]:
## directory paths
dge_dir = '/home/niklas/projects/niche_environments_FIBROSIS/PCLS_human/01_data/ASK_joint/DGE_treatment_vs_FC/221204_PCLS_human_ASK_joint_'

In [3]:
cell_type_names = ['Aberrant_Basaloid','ectopic_EC','Myofibroblasts','Pericytes','Profibrotic_Macrophages']

In [4]:
## initialize master tables
master = pd.DataFrame()
master_filtered_pct_expressed = pd.DataFrame()
master_filtered_pval = pd.DataFrame()

In [5]:
for ct in cell_type_names:
    ## read data
    cmp4_table = pd.read_csv(dge_dir + ct + '_FC+CMP4_vs_FC_DGE_results.csv', index_col = 0)
    nintedanib_table = pd.read_csv(dge_dir + ct + '_FC+Nintedanib_vs_FC_DGE_results.csv', index_col = 0)
    
    ## select important columns only
    cmp4_table = cmp4_table[['log2fc','qval','pct.FCs','pct.FC+CMP4s']]
    nintedanib_table = nintedanib_table[['log2fc','qval','pct.FCs','pct.FC+Nintedanibs']]
    
    ### filter DGE tables by pct expressed
    #cmp4_table = cmp4_table[cmp4_table['pct.FC+CMP4'] > 0.10]
    #cmp4_table = cmp4_table[cmp4_table['pct.FCs'] > 0.10]
    #nintedanib_table = nintedanib_table[nintedanib_table['pct.FC+Nintedanib'] > 0.10]
    #nintedanib_table = nintedanib_table[nintedanib_table['pct.FCs'] > 0.10]
    
    ## replace exorbitant high logFC values
    cmp4_table['log2fc'] = [5 if logfc > 5 else logfc for logfc in cmp4_table['log2fc']]
    cmp4_table['log2fc'] = [-5 if logfc < -5 else logfc for logfc in cmp4_table['log2fc']]
    nintedanib_table['log2fc'] = [5 if logfc > 5 else logfc for logfc in nintedanib_table['log2fc']]
    nintedanib_table['log2fc'] = [-5 if logfc < -5 else logfc for logfc in nintedanib_table['log2fc']]
    
    ## rename columns
    cmp4_table.rename(columns = {'log2fc':str(ct +'_CMP4_log2FC'),
                                   'qval':str(ct +'_CMP4_pval_adj'),
                                   'pct.FCs':str(ct +'_CMP4_pct_FC'),
                                   'pct.FC+CMP4s':str(ct +'_CMP4_pct_FC_CMP4')}, inplace = True)
    nintedanib_table.rename(columns = {'log2fc':str(ct +'_Nintedanib_log2FC'),
                                   'qval':str(ct +'_Nintedanib_pval_adj'),
                                   'pct.CCs':str(ct +'_Nintedanib_pct_FC'),
                                   'pct.FC+Nintedanibs':str(ct +'_Nintedanib_pct_FC_Nintedanib')}, inplace = True)
    
    
    ## create separate master table filtered by qval
    cmp4_table_filtered_pval = cmp4_table[cmp4_table[str(ct +'_CMP4_pval_adj')] < 0.05 ]
    nintedanib_table_filtered_pval = nintedanib_table[nintedanib_table[str(ct +'_Nintedanib_pval_adj')] < 0.05 ]
    
    ## create separate master table filtered by pct expressed (in addition to qval)
    cmp4_table_filtered_pct_expressed = cmp4_table_filtered_pval[cmp4_table_filtered_pval[str(ct +'_CMP4_pct_FC_CMP4')] > 0.1]
    nintedanib_table_filtered_pct_expressed = nintedanib_table_filtered_pval[nintedanib_table_filtered_pval[str(ct +'_Nintedanib_pct_FC_Nintedanib')] > 0.1]
    
    ## merge tables
    comparison = pd.concat([cmp4_table, nintedanib_table], axis=1, join='outer')
    comparison_filtered_pval  = pd.concat([cmp4_table_filtered_pval, nintedanib_table_filtered_pval], axis=1, join='outer')
    comparison_filtered_pct_expressed = pd.concat([cmp4_table_filtered_pct_expressed, nintedanib_table_filtered_pct_expressed], axis=1, join='outer')
    
    ### save results table
    #csv_dir = str('/home/niklas/projects/niche_environments_FIBROSIS/PCLS_human/01_data/ASK_joint/DGE_correlation_tables/221214_dge_correlation_table_' + ct + '.csv')
    #comparison.to_csv(csv_dir, index = True)
    
    ## add column to 'MASTER' table
    master = pd.concat([master, comparison], axis=1, join='outer')
    master_filtered_pct_expressed = pd.concat([master_filtered_pct_expressed, comparison_filtered_pct_expressed], axis=1, join='outer')
    master_filtered_pval = pd.concat([master_filtered_pval, comparison_filtered_pval], axis=1, join='outer')

In [6]:
master.head(15)

Unnamed: 0_level_0,Aberrant_Basaloid_CMP4_log2FC,Aberrant_Basaloid_CMP4_pval_adj,Aberrant_Basaloid_CMP4_pct_FC,Aberrant_Basaloid_CMP4_pct_FC_CMP4,Aberrant_Basaloid_Nintedanib_log2FC,Aberrant_Basaloid_Nintedanib_pval_adj,pct.FCs,Aberrant_Basaloid_Nintedanib_pct_FC_Nintedanib,ectopic_EC_CMP4_log2FC,ectopic_EC_CMP4_pval_adj,...,pct.FCs,Pericytes_Nintedanib_pct_FC_Nintedanib,Profibrotic_Macrophages_CMP4_log2FC,Profibrotic_Macrophages_CMP4_pval_adj,Profibrotic_Macrophages_CMP4_pct_FC,Profibrotic_Macrophages_CMP4_pct_FC_CMP4,Profibrotic_Macrophages_Nintedanib_log2FC,Profibrotic_Macrophages_Nintedanib_pval_adj,pct.FCs,Profibrotic_Macrophages_Nintedanib_pct_FC_Nintedanib
gene,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
ZNF724,5.0,0.0,0.0,0.013661,5.0,0.0,0.0,0.020408,,,...,,,,,,,,,,
NDP,5.0,0.0,0.0,0.013661,,,,,,,...,,,,,,,,,,
GABRP,3.667137,0.002634,0.002894,0.027322,2.397648,0.06153309,0.002894,0.020408,,,...,,,,,,,,,,
OR5AU1,3.11617,0.059704,0.001447,0.016393,,,,,,,...,,,,,,,,,,
NEB,2.916769,0.001129,0.001447,0.013661,,,,,,,...,0.016071,0.026074,,,,,2.136221,0.056988,0.001791,0.011029
HP,2.91062,0.077657,0.002894,0.013661,,,,,,,...,,,,,,,,,,
CHIA,2.835474,0.074358,0.001447,0.016393,,,,,,,...,,,,,,,,,,
SOD2-1,2.810122,0.007675,0.002894,0.030055,1.229754,0.01843329,0.002894,0.020408,,,...,,,-5.0,0.0,0.005372,0.0,-5.0,0.0,0.005372,0.0
AC011498.4,2.680487,0.000612,0.005789,0.043716,1.594049,5.609099e-07,0.005789,0.054422,,,...,,,-5.0,0.0,0.005372,0.0,-5.0,0.0,0.005372,0.0
MYLK3,2.648371,0.090089,0.001447,0.016393,,,,,,,...,,,,,,,,,,


In [7]:
master.shape

(14386, 40)

In [8]:
master_filtered_pct_expressed.shape

(9505, 40)

In [9]:
master_filtered_pval.shape

(13856, 40)

In [10]:
## save master tables
csv_dir = str('/home/niklas/projects/niche_environments_FIBROSIS/HUMAN_invivo_exvivo_comparison/01_data/ASK_joint/221214_ASK_joint_MASTER_dge_table_cell_circuit_treatment.csv')
master.to_csv(csv_dir, index = True)

In [11]:
master_filtered_pct_expressed.shape

(9505, 40)

In [12]:
## save master tables
csv_dir = str('/home/niklas/projects/niche_environments_FIBROSIS/HUMAN_invivo_exvivo_comparison/01_data/ASK_joint/221214_ASK_joint_MASTER_dge_table_cell_circuit_treatment_filtered_pct_expr.csv')
master_filtered_pct_expressed.to_csv(csv_dir, index = True)

In [13]:
master_filtered_pval.shape

(13856, 40)

In [14]:
## save master tables
csv_dir = str('/home/niklas/projects/niche_environments_FIBROSIS/HUMAN_invivo_exvivo_comparison/01_data/ASK_joint/221214_ASK_joint_MASTER_dge_table_cell_circuit_treatment_filtered_pval.csv')
master_filtered_pval.to_csv(csv_dir, index = True)