In [None]:
import numpy as np # scientific computing
import pandas as pd # data loading and processing
import os # os operations
import matplotlib.pyplot as plt # for generating figures
import math
import matplotlib.dates as mdates
import seaborn as sns # for generating visualizations, better support with pandas than matplotlib
from scipy import stats
from sklearn.impute import SimpleImputer
from analysis import *

In [None]:
def database_screen(data, hccdb, gene_set, databases, x="RRM2B", y="AOS"):
    # master script to screen (GSTA4, GSTP1, GSTA1) cancers for RRM2B - AOS
    
    # load gene names
    x_set, y_set, targets = get_xy_set(gene_set, xvar=x,yvar=y)

    # define subplot grid
    fig, axs = get_database_fig(x + " vs " + y, x, y)
    rtemp = [], ptemp = []

    # main loop
    for db, ax in zip(databases, axs.ravel()):
        print(db)
        df = get_data(data, hccdb, db)
        data_new = process_data(df, targets, x_var_names = x_set, y_var_names = y_set, pheno_filtered=None, outlier_corrected = True)
        r, p = analyse(data_new, fig, db, ax, title + ' screen.png', x_label = x, y_label = y)
        rtemp.append(r)
        ptemp.append(p)

    return rtemp, ptemp



In [None]:
def main():
    data, hccdb = get_raw_data()
    gene_set = get_gene_sets()
    r_all =[]
    p_all = []
    analyses = ["RRM2B - NRF2",
                "RRM2B - AOS",
                "RRM2B - G6PD",
                "RRM2B - p53",
                "p53 - G6PD",
                "p53 - NRF2",
                "p53 - AOS",
                ]
    # load database names
    databases = ['HCCDB-1', 'HCCDB-3', 'HCCDB-4',  'HCCDB-8', 'HCCDB-9', 'HCCDB-11', 
        'HCCDB-12', 'HCCDB-13', 'HCCDB-14', 'HCCDB-16', 'HCCDB-17', 'HCCDB-18',
        'ACC', 'BLCA', 'DBLC', 'UCEC', 'SKCM', 'HNSC', 'PRAD', 'KIRP',
        'PAAD', 'SARC', 'CESC', 'COAD', 'LUSC', 'READ', 'KIRC', 'LIHC',
        'BRCA', 'OV', 'UCS', 'GBM', 'KICH', 'THCA', 'LGG', 'LUAD', 'MESO',
        'PCPG', 'TGCT', 'UVM', 'THYM', 'CHOL', 'ESCA', 'STAD', 'LAML','PANCAN'] # , , 'PANCAN'

    for a in analyses:
        x, y = a.split(" - ")
        r, p = database_screen(data, hccdb, gene_set, x=x, y=y)
        r_all.append(r)
        p_all.append(p)

    df_r = pd.DataFrame(r_all, columns = databases, index= analyses)
    df_p = pd.DataFrame(p_all, columns = databases, index= analyses)
    df_r.to_csv("r values cancer screen (GSTA4, GSTP1, GSTA1).csv")
    df_p.to_csv("p values cancer screen (GSTA4, GSTP1, GSTA1).csv")