In [None]:
import numpy as np # scientific computing
import pandas as pd # data loading and processing
import os # os operations
import matplotlib.pyplot as plt # for generating figures
import math
import matplotlib.dates as mdates
import seaborn as sns # for generating visualizations, better support with pandas than matplotlib
from scipy import stats
from sklearn.impute import SimpleImputer

from analysis import *

In [None]:
data, hccdb = get_raw_data()

In [None]:
r_all =[]
p_all = []

In [None]:
# load gene names
signatures = ['HIF-1a_final', 'HSF1_final', 'NF-kB_final', 'TP53_final', 'NRF2_final','AOS_final', 'AP-1_final', 'FOXO_final', 'PGC1-a_final']

# load database names
databases = ['HNSC', 'SARC', 'LUSC', 'UCS', 'STAD', 'LAML', 'PRAD', 'PAAD', 'BRCA', 'DBLC', 'SKCM', 'OV', 'ESCA'] # , , 'PANCAN'

genesets_all = pd.read_csv("./data/oxstress genes.csv", index_col=None, header= 0)

# main loop
for s in signatures:
    # define subplot grid
    fig, axs = plt.subplots(3, 6, figsize=(60, 40))
    plt.subplots_adjust(hspace=0.6, wspace = 0.4)
    fig.suptitle('RRM2B vs ' + s,fontsize = 40)

    rtemp = []
    ptemp = []

    gene_set = genesets_all[s].dropna().tolist()
    targets = list(set(["RRM2B"] +  gene_set))
    
    for db, ax in zip(databases, axs.ravel()):
        
        # load data
        df = get_data(data, hccdb=None, db=db)
        
        data_new = process_data(df = df, targets = targets, x_var_names = ["RRM2B"], y_var_names = gene_set, pheno_filtered=None, outlier_corrected = True)
        r,p = analyse(data = data_new, fig = fig, db = db, ax = ax, fn = "signature screen " + s + '.png', x_label = "RRM2B", y_label = s, x_target = "x_composite_score", y_target = "y_composite_score")
        rtemp.append(r)
        ptemp.append(p)
        # try:
        #     data_new = process_data(df = df, targets = targets, x_var_names = ["RRM2B"], y_var_names = gene_set, pheno_filtered=None, outlier_corrected = True)
        #     r,p = analyse(data = data_new, fig = fig, db = db, ax = ax, fn = "signature screen " + s + '.png', x_label = "RRM2B", y_label = s, x_target = "x_composite_score", y_target = "y_composite_score")
        #     rtemp.append(r)
        #     ptemp.append(p)

        # except KeyError:
        #     print("key error")
        #     continue

    r_all.append(rtemp)
    p_all.append(ptemp)
    
print("done")

df_r = pd.DataFrame(r_all, columns = signatures, index = databases)
df_p = pd.DataFrame(p_all, columns = signatures, index = databases)
df_r.to_csv("r values signature screen.csv")
df_p.to_csv("p values signature screen.csv")


In [None]:
len(r_all[0])

In [None]:
len(signatures)
len(databases)

In [None]:
df_r = pd.DataFrame(r_all, columns = databases, index = signatures).T
df_p = pd.DataFrame(p_all, columns = databases, index = signatures).T
df_r.to_csv("r values signature screen.csv")
df_p.to_csv("p values signature screen.csv")