# Summarize results

In [1]:
# Imports

import sys, os
import pandas as pd
import numpy as np

# Variables

PATH_ = os.getcwd()


folders = [
    "gsea_fulluniv",
    "gsea_reduniv",
    ]

reference = "gsea_fulluniv"

In [2]:
FILENAMES = [
    "CSP_I7_C1",
    "CSP_RTS_CONTROL",
    "iRBC_I7_C1",
    "iRBC_RTS_CONTROL",
    "malaria_CSP_CONTROL",
    "malaria_CSP_RTS",
    "malaria_iRBC_CONTROL",
    "malaria_iRBC_RTS",
    "protected_CSP_C1",
    "protected_CSP_I7",
    "protected_iRBC_C1",
    "protected_iRBC_I7"
]

GENESETS = [
    "btm",
    "modules",
    "biocarta",
    "kegg",
    "reactome"
]


In [14]:
def individual_summaries(filename, geneset):
    
    PATH = PATH_ + "/sysmalvac_results_wgcna/" + filename + "/" + geneset + "/"

    ## Read the reference

    REF = pd.read_csv(PATH + "/" + reference + "/output.tsv", delimiter = "\t").sort_values("pval")

    R = []

    for fold in folders[1:]:
        PVALS = {}
        D = pd.read_csv(PATH + "/" + fold + "/output.tsv", delimiter = "\t")
        genesets   = np.array(D["geneset"])
        pvals      = np.array(D["pval"], dtype = np.float)
        directions = np.array(D["direction"])    
        if "fisher_" in fold:
            As = np.array(D["A"])
            mask = As >= 3
            genesets   = genesets[mask]
            pvals      = pvals[mask]
            directions = directions[mask]
        for g, p, d in zip(genesets, pvals, directions):
            PVALS[(g, d)] = p
        R += [PVALS]

    with open(PATH + "/summary.tsv", "w") as f:
        f.write("geneset\tsize\tmatched_size\tdirection\tes\tnes\tpval\tfdr\tleading_edge\t" + "\t".join(folders[1:]) + "\n")
        for r in REF.values:
            geneset = r[0]
            direction = r[3]
            S = "%s\t%d\t%d\t%s\t%.3f\t%.3f\t%.3E\t%.3E\t%s\t" % tuple(r)
            for d in R:
                if (geneset, direction) in d:
                    S += "%.3E\t" % d[(geneset, direction)]
                else:
                    S += "NA\t"
            S = S.rstrip("\t")
            f.write(S + "\n")

In [16]:
for filename in FILENAMES:
    for geneset in GENESETS:
        individual_summaries(filename, geneset)
        

In [17]:
# Now do a macro summary

D = None

for filename in FILENAMES:
    for geneset in GENESETS:
        PATH = PATH_ + "/sysmalvac_results_wgcna/" + filename + "/" + geneset + "/"
        df = pd.read_csv(PATH + "/summary.tsv", delimiter = "\t")
        df["condition" ] = [filename]*len(df)
        df["collection"] = [geneset]*len(df)
        df = df[["condition", "collection"]+list(df.columns[:-2])]
        if D is None:
            D = df.copy()
        else:
            D= pd.concat([D, df])

In [18]:
D.to_csv(PATH_ + "/sysmalvac_results_wgcna/" + "summary.tsv", sep = "\t", index = False)