gseapy analysis of H1 triflic  
gsea algorithm: http://software.broadinstitute.org/gsea/doc/GSEAUserGuideFrame.html  
gseapy: https://media.readthedocs.org/pdf/gseapy/latest/gseapy.pdf  

In [1]:
import sys
import os
from itertools import chain
from collections import defaultdict
import numpy as np
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns
pd.set_option('precision', 3)
pd.set_option('display.max_colwidth', -1)
import gseapy as gp
import goatools

obodag = goatools.obo_parser.GODag('go-basic.obo')

load obo file go-basic.obo
go-basic.obo: fmt(1.2) rel(2018-01-12) 49,288 GO Terms


In [2]:
sys.path.insert(0, "/home/gstupp/projects/metaproteomics")
from metaproteomics import utils
#from metaproteomics.analysis import build_loci

BASE = 'out/'
grouped_loci = utils.load(os.path.join(BASE,"grouped_loci_filt_annot.pkl.gz"))

In [3]:
def make_go2Gene_map(grouped_loci, ontology='MF'):    

    ontology_map = {'MF': 'molecular_function', 'BP': 'biological_process', 'CC': 'cellular_component'}
    
    out = defaultdict(set)    
    for l in grouped_loci:
        if 'go' in l.annotations:
            for go in l.annotations['go']:
                if obodag[go].namespace == ontology_map[ontology]:
                    out[go].add(l.cluster_id)
                    for parent in obodag[go].get_all_parents():
                        if obodag[parent].namespace == ontology_map[ontology]:
                            out[parent].add(l.cluster_id)
                
    return dict(out)

def filter_go2gene_map(go_locus):
    
    # Remove "very broad" gene sets. Arbitrary definition: gene sets that emcompass >50% of all IDs
    all_ids = set(chain(*go_locus.values()))
    go_locus = {key: value for (key, value) in go_locus.items() if len(value) / len(all_ids) <= 0.5}

    # Remove terms with less than 5 members: changed from 10 to 5 becasue small #s of proteins compared
    # to what you would find wiht genes
    go_locus = {key: value for (key, value) in go_locus.items() if len(value) >= 5}

    # Remove child terms with identical gene sets as their parents
    to_remove = set()
    for parent in go_locus.keys():
        # If child term has exact same members as parent, remove
        child_ids = [x.id for x in obodag[parent].children if x.id in go_locus.keys()]
        for child in child_ids:
            if go_locus[child] == go_locus[parent]:
                to_remove.add(child)
    go_locus = {key: value for (key, value) in go_locus.items() if key not in to_remove}

    # Remove sibling terms with identical gene sets
    to_remove = set()
    to_keep = set()
    for brother in go_locus.keys():
        to_keep.add(brother) # make sure filtered out siblings don't filter out ones we want to keep
        for parent in obodag[brother].parents:
            siblings = set([y.id for y in parent.children])
            if brother in siblings:
                siblings.remove(brother)
            for sibling in siblings:
                if sibling in go_locus.keys() and go_locus[brother] == go_locus[sibling] and not sibling in to_keep:
                    to_remove.add(sibling)
    go_locus = {key: value for (key, value) in go_locus.items() if key not in to_remove}

    return go_locus

def gomap_to_csv(go2gene, out_file = 'test.tsv'):

    out = ""
    for term, loci in go2gene.items():
        out += "{}\t".format(term)
        out += "{}\t".format(obodag[term].name)
        out += '\t'.join(list(map(str,loci)))
        out += '\n'
        
    with open(out_file, 'w') as fout:
        fout.write(out)

In [4]:
def run_go_gsea(rank_df, g2g_map, seed, outdir='tmp'):
    """
    A ranked df and go2gene mapping returns the result dataframe for GSEA against all go-Terms
    
    loci must be grouped such that avg_ratio and p-values are correct for 1 phenotype
    see rt_unenr_grouped_loci above for example
    """
    import gseapy as gp
        
    # save the go 2 gene map, since gseapy doesn't seem to be able to use one already in memory
    gomap_to_csv(g2g_map, 'temp.gmt')
    
    res = gp.prerank(rnk=rank_df, gene_sets='temp.gmt', outdir=outdir, min_size = 5, max_size=500, 
                     permutation_n = 10000, graph_num = 20, seed=seed)
      
    def get_go_name(term):
        return obodag[term].name
    
    res['name'] = res.index.map(get_go_name)
    
    return res.sort_values('nes', ascending=False)

def plot_gsea_result(row, rank):
    return gp.plot.gsea_plot(rank, row['name'], row.hit_index, row.nes, row.pval, row.fdr, row.rank_ES, phenoPos='Tcell', phenoNeg='RAG')

In [5]:
mf_map = make_go2Gene_map(grouped_loci)
mf_map_f = filter_go2gene_map(mf_map)

print('Unfiltered: {}\tFiltered: {}'.format(len(mf_map), len(mf_map_f)))

bp_map = make_go2Gene_map(grouped_loci, 'BP')
bp_map_f = filter_go2gene_map(bp_map)

print('Unfiltered: {}\tFiltered: {}'.format(len(bp_map), len(bp_map_f)))

cc_map = make_go2Gene_map(grouped_loci, 'CC')
cc_map_f = filter_go2gene_map(cc_map)

print('Unfiltered: {}\tFiltered: {}'.format(len(cc_map), len(cc_map_f)))

Unfiltered: 474	Filtered: 210
Unfiltered: 602	Filtered: 232
Unfiltered: 95	Filtered: 48


In [6]:
out_dir = "sonic_sup_whole_gsea"
df = pd.read_csv(os.path.join(BASE,"sonic_sup_v_whole_deseq_results.csv"))
df = df[(df.padj.abs()<=0.2)]
df['log2FoldChange'] = -1 * df['log2FoldChange']
rank_df = df[['Unnamed: 0', 'log2FoldChange']].rename(columns={'Unnamed: 0': 'gene_name', 'log2FoldChange': 'rank'})
rank_df = rank_df.sort_values('rank').reset_index(drop=True)

In [7]:
rank_df.head()

Unnamed: 0,gene_name,rank
0,62761516,-9.15
1,36974013,-8.926
2,39268506,-8.762
3,62236100,-8.731
4,63322719,-8.643


In [8]:
mf_res = run_go_gsea(rank_df, mf_map_f, seed=1111, outdir=out_dir)
bp_res = run_go_gsea(rank_df, bp_map_f, seed=1111, outdir=out_dir)
mf_rt = mf_res.query('nes > 0 and pval < 0.05').sort_values('nes', ascending=False)
mf_control = mf_res.query('nes < 0 and pval < 0.05').sort_values('nes', ascending=True)
bp_rt = bp_res.query('nes > 0 and pval < 0.05').sort_values('nes', ascending=False)
bp_control = bp_res.query('nes < 0 and pval < 0.05').sort_values('nes', ascending=True)

2018-02-13 11:51:09,963 Parsing data files for GSEA.............................
  mask |= (ar1 == a)
2018-02-13 11:51:11,306 0088 gene_sets used for further statistical testing.....
2018-02-13 11:51:11,307 Start to run GSEA...Might take a while..................
2018-02-13 11:58:40,731 Start to generate gseapy reports, and produce figures...
2018-02-13 11:58:48,504 Congratulations...GSEAPY run successfully...............
2018-02-13 11:58:48,525 Parsing data files for GSEA.............................
2018-02-13 11:58:49,951 0094 gene_sets used for further statistical testing.....
2018-02-13 11:58:49,951 Start to run GSEA...Might take a while..................
2018-02-13 12:06:43,267 Start to generate gseapy reports, and produce figures...
2018-02-13 12:06:50,900 Congratulations...GSEAPY run successfully...............


In [9]:
mf_rt

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0004611,0.688,4.788,0.0,0.0,81,79,"[41688438, 65362612, 40582524, 65031514, 53629697, 45517643, 49026941, 58388428, 42660247, 65423787, 62078189, 43556646, 58527538, 20942064, 64982050, 65743137, 70403312, 39426259, 67866969, 41914852, 40656833, 43734696, 45084143, 21022171, 57769961, 66824230, 62091010, 58317121, 54789106, 42606314, 66199306, 67249520, 39707926, 43394027, 66958425, 10848066, 51589159, 13308883, 62033622, 166396878, 166285214, 52151281, 58132764, 64743418, 51101110, 48156244, 13675824, 64070550, 22764913, 17563766, 44526011, 62318683, 20922359, 20910477, 58886235, 50127458, 43129949, 40914457, 48632614, 4706576, 57681464, 25957523, 45349478, 20910442, 64920279, 49784426, 56332816, 48413248, 52919310, 22182132, 45625937, 56366728, 51175066, 28556640, 37145660, 167777308, 56925762, 62322784, 45819353]",phosphoenolpyruvate carboxykinase activity
GO:0004612,0.691,4.483,0.0,0.0,65,63,"[65362612, 40582524, 65031514, 45517643, 58388428, 65423787, 43556646, 58527538, 64982050, 65743137, 67866969, 43734696, 21022171, 57769961, 66824230, 58317121, 54789106, 42606314, 66199306, 67249520, 10848066, 51589159, 13308883, 62033622, 166396878, 166285214, 52151281, 58132764, 64743418, 51101110, 48156244, 13675824, 64070550, 22764913, 17563766, 44526011, 62318683, 20922359, 20910477, 58886235, 50127458, 43129949, 40914457, 48632614, 4706576, 57681464, 25957523, 45349478, 20910442, 64920279, 49784426, 56332816, 48413248, 52919310, 22182132, 45625937, 56366728, 51175066, 28556640, 37145660, 167777308, 56925762, 62322784]",phosphoenolpyruvate carboxykinase (ATP) activity
GO:0016831,0.607,4.199,0.0,0.0,86,83,"[41688438, 65362612, 40582524, 65031514, 53629697, 45517643, 49026941, 58388428, 42660247, 65423787, 62078189, 43556646, 58527538, 20942064, 64982050, 65743137, 70403312, 39426259, 67866969, 41914852, 40656833, 43734696, 45084143, 21022171, 57769961, 66824230, 62091010, 58317121, 54789106, 42606314, 66199306, 67249520, 39707926, 43394027, 66958425, 10848066, 51589159, 13308883, 62033622, 166396878, 166285214, 52151281, 58132764, 64743418, 51101110, 48156244, 13675824, 64070550, 22764913, 17563766, 44526011, 62318683, 20922359, 20910477, 58886235, 50127458, 43129949, 40914457, 48632614, 4706576, 57681464, 25957523, 45349478, 20910442, 64920279, 49784426, 56332816, 48413248, 52919310, 22182132, 45625937, 56366728, 51175066, 28556640, 37145660, 167777308, 651398, 56925762, 62322784, 45819353, 67390440, 16675474, 14278226]",carboxy-lyase activity
GO:0016852,0.701,4.17,0.0,0.0,46,45,"[43325697, 54643075, 54690680, 58799028, 37318483, 52019133, 41242315, 62022650, 60198961, 62651633, 62344516, 39172859, 167609360, 62772745, 51661852, 65472615, 62887159, 41318454, 65884410, 62271819, 64380879, 45799801, 62037065, 62489518, 61238641, 66785143, 60430276, 50202327, 166044631, 62374554, 59069679, 65468006, 51766415, 11762451, 51299489, 36241782, 64532907, 62125341, 66312194, 167709467, 62225985, 62450424, 65349525, 50069915, 63687761]",sirohydrochlorin cobaltochelatase activity
GO:0016814,0.734,3.68,0.0,0.0,48,26,"[64861784, 166416342, 65420785, 59296033, 44484649, 47630323, 66430161, 21312369, 64976161, 166986795, 55182519, 51462612, 60663896, 64363232, 64362864, 59919098, 66809311, 64905338, 64166940, 64381315, 8047714, 44223410, 56263791, 66491142, 37615398, 56610477]","hydrolase activity, acting on carbon-nitrogen (but not peptide) bonds, in cyclic amidines"
GO:0004807,0.488,3.166,0.0,0.0,74,61,"[64195026, 64703224, 65264616, 64841796, 64133206, 64498996, 65106338, 63492044, 29389525, 64885716, 65018571, 65032287, 64455320, 57980588, 65046198, 65078746, 54457320, 65081691, 65087679, 65097705, 67240771, 54306173, 64473889, 64875018, 36734803, 36393272, 22209850, 17395134, 13754598, 7983883, 51821008, 166334753, 43017203, 42387603, 65065317, 7684206, 65235043, 65089145, 168199724, 8956920, 5720955, 20600041, 64624567, 65164419, 64898790, 56895760, 42871001, 58170004, 64380885, 39720529, 64528286, 166606542, 59429091, 67826821, 61963857, 14247320, 22612751, 17566765, 62032555, 46334712, 65099309]",triose-phosphate isomerase activity
GO:0016830,0.359,2.746,0.0,0.0,147,124,"[41688438, 65362612, 40582524, 65031514, 15986841, 67115654, 45697075, 53629697, 43201684, 45517643, 49026941, 58388428, 42660247, 65423787, 62078189, 43556646, 58527538, 20942064, 64982050, 65743137, 70403312, 39426259, 67866969, 41914852, 40656833, 43734696, 45084143, 21022171, 57769961, 64837845, 66824230, 62091010, 58317121, 54789106, 42606314, 66199306, 67249520, 39707926, 43394027, 66958425, 10848066, 51589159, 13308883, 62033622, 166396878, 166285214, 52151281, 58132764, 64743418, 51101110, 48156244, 13675824, 64070550, 22764913, 17563766, 44526011, 62318683, 20922359, 20910477, 58886235, 50127458, 43129949, 40914457, 48632614, 4706576, 57681464, 25957523, 45349478, 20910442, 64920279, 49784426, 56332816, 48413248, 52919310, 22182132, 45625937, 56366728, 51175066, 28556640, 37145660, 66504785, 63770542, 167777308, 38138593, 51162059, 47070436, 64519517, 65938157, 68390396, 60577156, 53819628, 66875411, 65586747, 63272810, 39681257, 57821908, 37365157, 29774394, 40859402, 68206998, ...]",carbon-carbon lyase activity
GO:0016639,0.458,2.668,0.0,2.578e-05,114,41,"[14991355, 21927582, 7968318, 8043602, 62793253, 29177351, 21050003, 62651086, 62731685, 18129188, 15893349, 27635651, 7263134, 62735721, 19462529, 19605298, 7568577, 62530976, 76161178, 62450456, 62659852, 16402044, 62222737, 62641403, 62705949, 62346271, 62737980, 62321084, 62495493, 62466914, 9740705, 10096928, 62627293, 21899224, 62621676, 13297824, 166852145, 17629009, 5673728, 62579123, 17074423]","oxidoreductase activity, acting on the CH-NH2 group of donors, NAD or NADP as acceptor"
GO:0016798,0.477,2.659,0.0,2.292e-05,54,37,"[51068900, 47273907, 38243737, 57128808, 61629822, 52933791, 40046530, 36958605, 60500208, 60202340, 61651164, 65644591, 62094517, 63196447, 63275448, 51196749, 47369697, 61698694, 44294067, 62529187, 3693214, 49330968, 39320181, 21063525, 56715199, 67476892, 5201836, 64089135, 65938215, 53808713, 51869018, 56946912, 61898906, 8141108, 3577059, 64348533, 62151616]","hydrolase activity, acting on glycosyl bonds"
GO:0051082,0.38,2.218,0.0,0.0005157,56,42,"[14545627, 61966522, 29982033, 10930687, 29702875, 10053077, 29866789, 2786018, 62087821, 15922034, 10988069, 62029042, 21209398, 8030294, 59355888, 796789, 62078750, 27851758, 62008039, 7101911, 31262174, 18954820, 757818, 1110855, 411676, 30844231, 31724738, 21902224, 27754959, 62030465, 14593159, 2074337, 2163077, 2148419, 20710713, 3770272, 28832507, 29244360, 19649266, 49727342, 297515, 28501782]",unfolded protein binding


In [10]:
mf_control

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0016880,-0.719,-4.6,0.0,0.0,354,346,"[61924909, 61840025, 8032062, 8047113, 8195829, 7973249, 21467527, 9048587, 8402422, 7962146, 9270396, 9723318, 10286900, 10365777, 10446427, 21419353, 21397527, 10510823, 8401762, 21570364, 10899019, 21974580, 6167382, 7091833, 7267974, 7286941, 7366547, 22217450, 7369045, 7430033, 7435790, 7491772, 7530963, 22163933, 7583511, 22074862, 7607889, 21392141, 7678047, 7931537, 21974789, 21253138, 18124263, 18047662, 10687793, 17953809, 10689796, 16841849, 15819993, 15167402, 14433022, 13726185, 10715532, 10730846, 13602922, 10773288, 12031731, 18594862, 18885106, 64783339, 64602626, 21041123, 11533029, 20968127, 20872157, 20871392, 10531299, 20636481, 21369340, 20633999, 20333901, 20329663, 10533506, 6122871, 20325100, 20014313, 18950246, 64746940, 20604552, 61896244, 5798443, 166788817, 166598674, 166455421, 166316037, 166187048, 166096580, 165825325, 165806817, 166796953, 166917403, 61897200, 168122802, 167807103, 167742008, 167737667, 167730639, 167686814, 167650683, 167565226, ...]",acid-ammonia (or amide) ligase activity
GO:0016879,-0.682,-4.393,0.0,0.0,431,382,"[38867144, 62250933, 20968243, 64856944, 64164012, 47981596, 65135677, 62018486, 21347547, 61964112, 66025336, 62113144, 62571187, 67213458, 38210069, 36238237, 68597221, 65428360, 64709852, 51305083, 41896895, 17571629, 62223265, 62561967, 67118703, 52291787, 62517803, 21823200, 64467565, 45677108, 60433306, 56040554, 51213248, 65510618, 62574969, 61924909, 61840025, 8032062, 8047113, 8195829, 7973249, 21467527, 9048587, 8402422, 7962146, 9270396, 9723318, 10286900, 10365777, 10446427, 21419353, 21397527, 10510823, 8401762, 21570364, 10899019, 21974580, 6167382, 7091833, 7267974, 7286941, 7366547, 22217450, 7369045, 7430033, 7435790, 7491772, 7530963, 22163933, 7583511, 22074862, 7607889, 21392141, 7678047, 7931537, 21974789, 21253138, 18124263, 18047662, 10687793, 17953809, 10689796, 16841849, 15819993, 15167402, 14433022, 13726185, 10715532, 10730846, 13602922, 10773288, 12031731, 18594862, 18885106, 64783339, 64602626, 21041123, 11533029, 20968127, 20872157, ...]","ligase activity, forming carbon-nitrogen bonds"
GO:0097747,-0.509,-3.219,0.0,0.0,318,298,"[28233603, 61586849, 45038305, 54892854, 66169230, 62562027, 48307887, 62070529, 56757236, 50692088, 62018988, 56945065, 4099172, 18109239, 53190058, 53072938, 10815512, 22238189, 39275703, 9012457, 21126076, 9215855, 7627337, 41751957, 63996097, 64034734, 63856881, 63974364, 6384290, 18193491, 63995453, 63857026, 63800039, 62092597, 63849268, 13596265, 18214391, 17625098, 7289884, 61719675, 61601524, 41165392, 51691431, 62155980, 61604840, 167412647, 61631863, 61586861, 21126049, 22236358, 5535467, 4081737, 28962119, 3704668, 31058828, 8692550, 8887938, 10252913, 11928233, 13664144, 31893283, 37387955, 62823286, 62874585, 63410001, 76298777, 165818124, 165825861, 5887279, 61079203, 8029150, 61595851, 61593743, 61736143, 61598320, 61589098, 36374512, 28430821, 21254405, 10774345, 6023174, 61782419, 168043212, 61607113, 21880278, 59642867, 58926833, 52385306, 15890002, 17201391, 20871177, 7440434, 4172233, 165694804, 20901943, 167428759, 16830531, 2011772, 166188741, 165965080, ...]",RNA polymerase activity
GO:0003677,-0.492,-3.186,0.0,0.0,432,399,"[2041002, 1822783, 2058497, 2057470, 1958375, 209005, 68051075, 1818671, 2456528, 1158404, 1023145, 7173479, 7697530, 30751374, 63933277, 21230260, 28233603, 61586849, 45038305, 66169230, 62562027, 48307887, 62070529, 56757236, 30842656, 52689644, 63340726, 62869708, 50692088, 62018988, 56945065, 47583840, 4099172, 18109239, 6205815, 7009404, 68704849, 167587023, 68582616, 167775873, 167395572, 166037070, 68206104, 68150755, 67950579, 68671309, 67819674, 67744907, 67353000, 54869360, 68283504, 68757205, 68398056, 69263052, 166395738, 69322165, 69216024, 69422003, 69982823, 165788228, 10815512, 22238189, 39275703, 9012457, 21126076, 9215855, 7627337, 41751957, 11630292, 68352957, 63996097, 63856881, 63974364, 6384290, 18193491, 63995453, 63857026, 63800039, 64411896, 62092597, 20599288, 15818532, 13272074, 73137117, 8615254, 64697440, 63849268, 51907206, 46260937, 13596265, 18214391, 17625098, 7289884, 61719675, 61601524, 41165392, 51691431, 62155980, 61604840, 167412647, ...]",DNA binding
GO:0016779,-0.5,-3.168,0.0,0.0,331,305,"[28233603, 61586849, 45038305, 54892854, 49051387, 66169230, 62562027, 48307887, 62070529, 56757236, 50692088, 62018988, 56945065, 20879488, 4099172, 18109239, 21898869, 53190058, 53072938, 10815512, 22238189, 39275703, 9012457, 21126076, 9215855, 7627337, 41751957, 63996097, 64034734, 63856881, 63974364, 6384290, 18193491, 63995453, 63857026, 63800039, 62092597, 47085626, 63849268, 17592682, 13596265, 18214391, 17625098, 7289884, 61719675, 61601524, 41165392, 51691431, 62155980, 61604840, 167412647, 61631863, 61586861, 21126049, 22236358, 5535467, 4081737, 28962119, 3704668, 31058828, 8692550, 8887938, 10252913, 11928233, 13664144, 31893283, 37387955, 62823286, 62874585, 63410001, 76298777, 165818124, 165825861, 5887279, 61079203, 8029150, 61595851, 61593743, 61736143, 61598320, 61589098, 36374512, 28430821, 21254405, 10774345, 6023174, 61782419, 168043212, 61607113, 21880278, 59642867, 58926833, 52385306, 15890002, 17201391, 20871177, 7440434, 4172233, 165694804, 20901943, ...]",nucleotidyltransferase activity
GO:0016903,-0.506,-3.139,0.0,0.0,289,249,"[62566345, 56107656, 13404073, 56109778, 52783963, 32092798, 52336699, 67069785, 42476287, 64211477, 53586382, 50837212, 61963309, 61886393, 20062885, 4791940, 67245171, 63571991, 60303775, 20649327, 21899103, 39350602, 56901494, 20892542, 18677477, 50431192, 64109059, 39753345, 63337446, 36361385, 50367162, 60276174, 8548706, 20062884, 66911221, 33438652, 21021450, 20660282, 63778920, 166799254, 62943474, 18128923, 40399566, 62631165, 9207648, 63596410, 63706395, 63311575, 63549088, 63983982, 63371685, 58371576, 39393827, 51207305, 51248525, 7287519, 45183113, 52418710, 60950488, 61600015, 38655163, 166155431, 61129631, 75295979, 22076829, 59824668, 5733333, 51014129, 22705782, 61603708, 44236509, 50683047, 43367995, 59554904, 65390960, 54191813, 52491895, 8552975, 38073401, 61704346, 62729986, 61885811, 54778675, 167826931, 61708837, 61600971, 50141100, 46576360, 37551209, 63032818, 61604052, 10974874, 50924511, 61609737, 59084622, 12424621, 15860346, 61656479, 20066991, 43239691, ...]","oxidoreductase activity, acting on the aldehyde or oxo group of donors"
GO:0140098,-0.483,-3.096,0.0,0.0,386,362,"[28233603, 61586849, 18885724, 167282458, 65389288, 64506172, 167277868, 62726266, 62393365, 52576745, 52481957, 28962162, 11791224, 45038305, 54892854, 45031759, 66169230, 62562027, 48307887, 62070529, 56757236, 66440034, 66068248, 54345745, 32783706, 63271051, 41750634, 44453573, 48472643, 43335971, 58267232, 62491289, 62542342, 62432381, 50692088, 6789864, 62018988, 56945065, 56002051, 4099172, 18109239, 53190058, 53072938, 10815512, 22238189, 39275703, 9012457, 21126076, 9215855, 7627337, 41751957, 62472987, 8291745, 63996097, 64034734, 28669037, 63856881, 63974364, 6384290, 18193491, 62150994, 62280444, 62282436, 62432270, 166993709, 63995453, 49798158, 63857026, 63800039, 62092597, 63849268, 17592682, 13596265, 18214391, 17625098, 7289884, 61719675, 61601524, 41165392, 51691431, 62155980, 61604840, 167412647, 61631863, 61586861, 21126049, 22236358, 5535467, 4081737, 28962119, 3704668, 31058828, 8692550, 8887938, 10252913, 11928233, 13664144, 31893283, 37387955, 62823286, ...]","catalytic activity, acting on RNA"
GO:0003723,-0.444,-2.846,0.0,0.0,411,367,"[7822625, 22855140, 23028276, 23390113, 21299373, 12268875, 57729363, 63212163, 166846359, 61919677, 27258242, 61886286, 61907404, 58033748, 29219123, 21327765, 20995791, 46820147, 66047049, 7529421, 65570045, 10689110, 2066706, 4009240, 28857394, 7569326, 32784939, 67104490, 21902328, 2514188, 2288535, 4100571, 7092868, 21326718, 29940019, 65717267, 36785606, 64021749, 22810887, 61895769, 20322451, 2123537, 166666623, 22163880, 61917199, 16613806, 29768242, 65477380, 68010124, 61908356, 29213833, 61893464, 27700985, 63035382, 23619953, 5107176, 61913890, 62860561, 63807427, 3972270, 4344665, 66802302, 67230263, 66875237, 66405184, 57932979, 68127142, 407882, 68307507, 74304065, 20904247, 64503554, 20813343, 47926867, 62535487, 67981082, 64312859, 67499229, 68018705, 66649599, 20275740, 8092444, 24075428, 69956572, 7971758, 29262834, 6356194, 19011820, 31255291, 54181584, 55004337, 65976369, 67613595, 68566471, 62915203, 45975512, 63351170, 61906511, 20204818, 69624820, ...]",RNA binding
GO:0051540,-0.468,-2.665,0.0,0.0,145,128,"[59659494, 43040079, 61697710, 42476287, 64211477, 61963309, 20892542, 18677477, 21346720, 64109059, 29370388, 36361385, 8548706, 56886384, 33438652, 27628152, 21021450, 50275436, 20660282, 52039172, 166799254, 62631165, 6741654, 7287519, 45183113, 52418710, 60950488, 61600015, 38655163, 64424621, 59824668, 5733333, 61603708, 59897134, 50683047, 43036462, 54465018, 22278233, 61704346, 62623466, 61885811, 62598760, 62669601, 54778675, 167826931, 62653360, 61600971, 64541123, 58283832, 37551209, 61604052, 10974874, 57375765, 62546878, 61609737, 28223687, 61656479, 43239691, 29612533, 47634295, 10686622, 17638382, 64415826, 66721633, 61724288, 20603093, 47641810, 64778158, 64320533, 167584218, 61592021, 57150804, 61599909, 62623655, 20823651, 62136189, 42518469, 49824671, 47631792, 22368963, 61599853, 62139539, 61600262, 22519522, 61601993, 15747084, 61957888, 20066308, 20721195, 8018209, 55023377, 43305150, 57644708, 6216328, 63046604, 64331237, 50443396, 45816831, 62751142, 48841715, ...]",metal cluster binding
GO:0048037,-0.413,-2.637,0.0,0.0,417,336,"[63792450, 61935358, 63726906, 27914542, 38216270, 63591819, 63067512, 63102330, 63987657, 64309036, 42383896, 67196259, 64327323, 53822642, 64626931, 59659494, 36748532, 63886895, 43040079, 61888884, 37081817, 18124355, 18220289, 62534327, 20891904, 61697710, 45006668, 63604464, 66811765, 38116795, 66532761, 64753270, 62217311, 67176225, 20839749, 63325361, 42476287, 64211477, 61034581, 63558496, 66843302, 61963309, 66198046, 50706059, 55264813, 49357861, 63693018, 63719650, 13253272, 20649327, 64722367, 21899103, 651398, 20892542, 18677477, 21346720, 20721400, 58043383, 61890070, 64109059, 4745251, 29370388, 69027792, 2010342, 66834457, 54345745, 47616868, 61499523, 36653192, 21808882, 62743829, 63337446, 36361385, 8548706, 46828817, 48535761, 29620826, 61506401, 31563170, 27696183, 56886384, 33438652, 27628152, 21021450, 50275436, 45902285, 20660282, 52039172, 166799254, 62943474, 62280444, 49798158, 8455597, 13653414, 63624223, 65317827, 62631165, 63311575, 63371685, 6741654, ...]",cofactor binding


In [11]:
bp_rt

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0040011,0.71,4.741,0.0,0.0,86,69,"[62398453, 62543065, 62698072, 62881914, 62350005, 62450054, 64272473, 62769590, 62343668, 62271880, 62182764, 22255856, 62285751, 63245372, 62964422, 57741843, 62342496, 65192787, 67423079, 166851534, 13433916, 22625707, 42831732, 62417891, 55916126, 56924487, 62085029, 62923262, 28438217, 64855697, 63865661, 62405618, 63669394, 63452851, 62926584, 62853162, 62849941, 62510437, 21809431, 62265597, 38214195, 62582614, 43730808, 62157933, 45950065, 63205708, 63306571, 49361447, 1886631, 39745655, 23658537, 62766323, 62838017, 23658535, 63093125, 64376861, 63144665, 63039819, 48844574, 62966887, 49492210, 63798592, 65147516, 63489079, 65866265, 167570053, 64167744, 65968008, 68570136]",locomotion
GO:0006928,0.71,4.734,0.0,0.0,86,69,"[62398453, 62543065, 62698072, 62881914, 62350005, 62450054, 64272473, 62769590, 62343668, 62271880, 62182764, 22255856, 62285751, 63245372, 62964422, 57741843, 62342496, 65192787, 67423079, 166851534, 13433916, 22625707, 42831732, 62417891, 55916126, 56924487, 62085029, 62923262, 28438217, 64855697, 63865661, 62405618, 63669394, 63452851, 62926584, 62853162, 62849941, 62510437, 21809431, 62265597, 38214195, 62582614, 43730808, 62157933, 45950065, 63205708, 63306571, 49361447, 1886631, 39745655, 23658537, 62766323, 62838017, 23658535, 63093125, 64376861, 63144665, 63039819, 48844574, 62966887, 49492210, 63798592, 65147516, 63489079, 65866265, 167570053, 64167744, 65968008, 68570136]",movement of cell or subcellular component
GO:0006766,0.701,4.234,0.0,0.0,53,47,"[43325697, 54643075, 54690680, 58799028, 37318483, 52019133, 41242315, 62022650, 60198961, 62651633, 62344516, 39172859, 167609360, 62772745, 51661852, 65472615, 62887159, 41318454, 65884410, 62271819, 64380879, 45799801, 62037065, 62489518, 61238641, 66785143, 60430276, 50202327, 166044631, 62374554, 66504785, 59069679, 65468006, 51766415, 11762451, 51299489, 36241782, 64532907, 2412903, 62125341, 66312194, 167709467, 62225985, 62450424, 65349525, 50069915, 63687761]",vitamin metabolic process
GO:0033013,0.701,4.199,0.0,0.0,46,45,"[43325697, 54643075, 54690680, 58799028, 37318483, 52019133, 41242315, 62022650, 60198961, 62651633, 62344516, 39172859, 167609360, 62772745, 51661852, 65472615, 62887159, 41318454, 65884410, 62271819, 64380879, 45799801, 62037065, 62489518, 61238641, 66785143, 60430276, 50202327, 166044631, 62374554, 59069679, 65468006, 51766415, 11762451, 51299489, 36241782, 64532907, 62125341, 66312194, 167709467, 62225985, 62450424, 65349525, 50069915, 63687761]",tetrapyrrole metabolic process
GO:0046364,0.527,3.878,0.0,0.0,109,103,"[40327086, 51457463, 38362906, 41688438, 65362612, 40582524, 65031514, 53629697, 29430347, 13670649, 45517643, 49026941, 58388428, 42660247, 65423787, 62078189, 43556646, 58527538, 20942064, 64982050, 65743137, 70403312, 39426259, 67866969, 41914852, 40656833, 43734696, 45084143, 21022171, 57769961, 66824230, 62091010, 58317121, 54789106, 42606314, 66199306, 67249520, 66988806, 39707926, 43394027, 66958425, 10848066, 51589159, 13308883, 57125904, 62033622, 166396878, 166285214, 52151281, 58132764, 64743418, 51673203, 51101110, 48156244, 13675824, 64070550, 22764913, 55816901, 17563766, 44526011, 62318683, 20922359, 20910477, 58886235, 50127458, 43129949, 40914457, 48632614, 4706576, 57681464, 25957523, 45349478, 20910442, 43189595, 64920279, 49784426, 56332816, 48413248, 52919310, 22182132, 45625937, 56366728, 51175066, 28556640, 37145660, 167777308, 68018518, 60931305, 65447256, 38603820, 63986036, 56925762, 62322784, 42399038, 45917122, 60662052, 65725341, 65029251, 63597941, 39787083, ...]",monosaccharide biosynthetic process
GO:0016051,0.436,3.243,0.0,0.0,119,112,"[40327086, 51457463, 38362906, 41688438, 65362612, 40582524, 65031514, 53629697, 29430347, 13670649, 45517643, 49026941, 58388428, 42660247, 65423787, 62078189, 43556646, 58527538, 20942064, 64982050, 65743137, 70403312, 39426259, 67866969, 41914852, 40656833, 43734696, 45084143, 21022171, 57769961, 66824230, 62091010, 58317121, 54789106, 42606314, 66199306, 67249520, 66988806, 39707926, 43394027, 66958425, 10848066, 51589159, 13308883, 57125904, 62033622, 166396878, 166285214, 52151281, 58132764, 64743418, 51673203, 51101110, 48156244, 13675824, 64070550, 22764913, 55816901, 17563766, 44526011, 62318683, 20922359, 20910477, 58886235, 50127458, 43129949, 40914457, 48632614, 4706576, 57681464, 25957523, 45349478, 20910442, 43189595, 64920279, 49784426, 56332816, 48413248, 52919310, 22182132, 45625937, 56366728, 51175066, 28556640, 37145660, 167777308, 68018518, 60931305, 65447256, 38603820, 63986036, 56925762, 62322784, 20879488, 42399038, 45917122, 62497535, 22306409, 166192803, 42102729, ...]",carbohydrate biosynthetic process
GO:0006006,0.372,2.808,0.0,0.0,129,119,"[40327086, 51457463, 38362906, 41688438, 65362612, 40582524, 65031514, 53629697, 29430347, 13670649, 45517643, 49026941, 58388428, 42660247, 65423787, 62078189, 43556646, 58527538, 20942064, 64982050, 65743137, 70403312, 39426259, 67866969, 41914852, 40656833, 43734696, 45084143, 21022171, 57769961, 66824230, 62091010, 58317121, 54789106, 42606314, 66199306, 67249520, 66988806, 39707926, 43394027, 66958425, 10848066, 51589159, 13308883, 57125904, 62033622, 166396878, 166285214, 52151281, 58132764, 64743418, 51673203, 51101110, 48156244, 13675824, 64070550, 22764913, 55816901, 17563766, 44526011, 62318683, 20922359, 20910477, 58886235, 50127458, 43129949, 40914457, 48632614, 4706576, 57681464, 25957523, 45349478, 20910442, 43189595, 64920279, 49784426, 56332816, 48413248, 52919310, 22182132, 45625937, 56366728, 51175066, 28556640, 37145660, 167777308, 68018518, 60931305, 65447256, 38603820, 63986036, 20649327, 21899103, 56925762, 62322784, 63337446, 42399038, 45917122, 62943474, 63311575, ...]",glucose metabolic process
GO:0000272,0.484,2.565,0.0,0.0,42,30,"[61738591, 37974360, 61876717, 52545436, 65644591, 63447567, 62589924, 61639799, 61551448, 49330968, 61634094, 44178556, 62062305, 64042494, 67476892, 61544348, 43890615, 56917235, 63514938, 62214925, 55546725, 41561744, 38244546, 65610752, 62771868, 41674507, 61623890, 61565572, 49789731, 61895628]",polysaccharide catabolic process
GO:0044283,0.291,2.473,0.0,6.173e-05,317,229,"[40327086, 51457463, 38362906, 41688438, 65362612, 40582524, 65031514, 43325697, 62425212, 62396515, 53629697, 29430347, 13670649, 54643075, 45517643, 49026941, 58064839, 58388428, 66309200, 42660247, 65423787, 62078189, 43556646, 58527538, 20942064, 64982050, 65743137, 54690680, 70403312, 39426259, 67866969, 41914852, 40656833, 43734696, 58799028, 45084143, 21022171, 37318483, 57769961, 66824230, 62091010, 58317121, 54789106, 42606314, 66199306, 67249520, 66988806, 39707926, 43394027, 66958425, 10848066, 51589159, 52019133, 13308883, 57125904, 62033622, 166396878, 166285214, 52151281, 41242315, 58132764, 64743418, 51673203, 51101110, 48156244, 13675824, 64070550, 22764913, 55816901, 17563766, 44526011, 62318683, 20922359, 20910477, 62022650, 60198961, 58886235, 62651633, 50127458, 43129949, 40914457, 48632614, 62344516, 39172859, 4706576, 167609360, 62772745, 57681464, 51661852, 25957523, 45349478, 20910442, 43189595, 65472615, 62887159, 64920279, 41318454, 49784426, 56332816, 48413248, ...]",small molecule biosynthetic process
GO:0019318,0.29,2.294,0.0,0.0003519,183,148,"[40327086, 51457463, 38362906, 41688438, 65362612, 40582524, 65031514, 61651164, 53629697, 29430347, 13670649, 63275448, 45517643, 49026941, 58388428, 42660247, 65423787, 62078189, 43556646, 58527538, 20942064, 64982050, 65743137, 70403312, 39426259, 67866969, 41914852, 40656833, 43734696, 45084143, 21022171, 57769961, 66824230, 62091010, 58317121, 54789106, 42606314, 66199306, 67249520, 66988806, 39707926, 43394027, 66958425, 10848066, 51589159, 13308883, 57125904, 62033622, 47390357, 166396878, 166285214, 52151281, 58132764, 66118699, 64743418, 51673203, 51101110, 48156244, 13675824, 64070550, 22764913, 55816901, 17563766, 44526011, 62318683, 20922359, 20910477, 43936470, 36923984, 58886235, 50127458, 43129949, 40914457, 48632614, 4706576, 57681464, 25957523, 45349478, 20910442, 43189595, 64920279, 49784426, 56332816, 48413248, 52919310, 22182132, 45625937, 56366728, 51175066, 28556640, 37145660, 167777308, 21365243, 64924072, 54139955, 68018518, 62209062, 62058156, 44201319, 60931305, ...]",hexose metabolic process


In [13]:
bp_control

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0032774,-0.507,-3.197,0.0,0.0,320,300,"[28233603, 61586849, 45038305, 54892854, 66169230, 62562027, 48307887, 62070529, 56757236, 50692088, 62018988, 56945065, 4099172, 18109239, 53190058, 53072938, 60582086, 10815512, 22238189, 39275703, 9012457, 21126076, 9215855, 7627337, 41751957, 63996097, 64034734, 63856881, 63974364, 6384290, 18193491, 63995453, 63857026, 63800039, 62092597, 63849268, 13596265, 18214391, 17625098, 7289884, 61719675, 61601524, 41165392, 51691431, 62155980, 61604840, 167412647, 61631863, 61586861, 21126049, 22236358, 5535467, 4081737, 28962119, 3704668, 31058828, 8692550, 8887938, 10252913, 11928233, 13664144, 31893283, 37387955, 62823286, 62874585, 63410001, 76298777, 165818124, 165825861, 5887279, 61079203, 8029150, 61595851, 61593743, 61736143, 61598320, 61589098, 36374512, 28430821, 21254405, 10774345, 6023174, 61782419, 168043212, 61607113, 21880278, 59642867, 58926833, 52385306, 15890002, 17201391, 20871177, 7440434, 4172233, 165694804, 20901943, 167428759, 16830531, 2011772, 166188741, ...]",RNA biosynthetic process
GO:0090304,-0.482,-3.123,0.0,0.0,451,424,"[20322451, 7173479, 7697530, 30751374, 63933277, 21230260, 28233603, 61586849, 18885724, 167282458, 65389288, 64506172, 167277868, 62726266, 62393365, 52576745, 52481957, 28962162, 11791224, 45038305, 54892854, 45031759, 66169230, 62562027, 48307887, 62070529, 56757236, 30842656, 52689644, 63340726, 62869708, 66440034, 66068248, 54345745, 32783706, 63271051, 41750634, 44453573, 48472643, 43335971, 58267232, 62491289, 62542342, 62432381, 50692088, 6789864, 62018988, 56945065, 47583840, 56002051, 4099172, 18109239, 6205815, 7009404, 21898869, 68704849, 167587023, 68582616, 167775873, 167395572, 166037070, 68206104, 68150755, 67950579, 68671309, 67819674, 67744907, 67353000, 54869360, 68283504, 68757205, 68398056, 69263052, 166395738, 69322165, 69216024, 69422003, 69982823, 165788228, 53190058, 53072938, 60582086, 10815512, 22238189, 39275703, 9012457, 21126076, 9215855, 7627337, 41751957, 62472987, 8291745, 63996097, 64034734, 28669037, 63856881, 63974364, 6384290, 18193491, 62150994, ...]",nucleic acid metabolic process
GO:0016070,-0.483,-3.107,0.0,0.0,390,366,"[28233603, 61586849, 18885724, 167282458, 65389288, 64506172, 167277868, 62726266, 62393365, 52576745, 52481957, 28962162, 11791224, 45038305, 54892854, 45031759, 66169230, 62562027, 48307887, 62070529, 56757236, 66440034, 66068248, 54345745, 32783706, 63271051, 41750634, 44453573, 48472643, 43335971, 58267232, 62491289, 62542342, 62432381, 50692088, 6789864, 62018988, 56945065, 56002051, 4099172, 18109239, 21898869, 53190058, 53072938, 60582086, 10815512, 22238189, 39275703, 9012457, 21126076, 9215855, 7627337, 41751957, 62472987, 8291745, 63996097, 64034734, 28669037, 63856881, 63974364, 6384290, 18193491, 62150994, 62280444, 62282436, 62432270, 166993709, 63995453, 49798158, 63857026, 63800039, 62092597, 63849268, 17592682, 13596265, 18214391, 17625098, 7289884, 61719675, 61601524, 41165392, 51691431, 62155980, 61604840, 167412647, 61631863, 61586861, 21126049, 22236358, 5535467, 4081737, 28962119, 3704668, 31058828, 8692550, 8887938, 10252913, 11928233, 13664144, 31893283, ...]",RNA metabolic process
GO:0034654,-0.443,-2.866,0.0,0.0,483,402,"[64861784, 166416342, 65420785, 62229406, 38262356, 58745702, 59296033, 44484649, 48850436, 62211247, 42697888, 64814712, 47630323, 66430161, 21312369, 64976161, 166986795, 55182519, 51462612, 60663896, 64363232, 64362864, 59919098, 66809311, 64905338, 64166940, 64381315, 8047714, 44223410, 56263791, 66491142, 37615398, 56610477, 44035050, 66911439, 51908457, 167911272, 50219646, 37081817, 62475357, 46925916, 62665934, 7441750, 28233603, 61586849, 1813814, 47320492, 45038305, 54892854, 66169230, 62562027, 48307887, 62070529, 56757236, 5200631, 50692088, 62018988, 56945065, 4099172, 62517803, 21823200, 32363526, 18109239, 44962491, 45919107, 53190058, 53072938, 60582086, 10815512, 22238189, 39275703, 9012457, 21126076, 9215855, 7627337, 41751957, 44846469, 63996097, 64034734, 63856881, 63974364, 6384290, 18193491, 50924924, 16352421, 47036627, 30284856, 63995453, 7825230, 63857026, 63800039, 62092597, 50069858, 18219005, 17172671, 38565790, 63849268, 46592634, 13596265, 18214391, ...]",nucleobase-containing compound biosynthetic process
GO:0006414,-0.432,-2.45,0.0,0.0,130,126,"[7822625, 22855140, 23028276, 23390113, 21299373, 12268875, 63212163, 61919677, 27258242, 61886286, 61907404, 21327765, 20995791, 46820147, 7529421, 2066706, 4009240, 7569326, 32784939, 21902328, 2514188, 2288535, 4100571, 7092868, 29940019, 61895769, 20322451, 2123537, 22163880, 61917199, 16613806, 29768242, 61908356, 61893464, 63035382, 23619953, 61913890, 62860561, 3972270, 4344665, 407882, 74304065, 20904247, 20813343, 62535487, 20275740, 8092444, 24075428, 7971758, 29262834, 6356194, 62915203, 61906511, 20204818, 59467852, 21317648, 5889071, 61921125, 22141261, 10520505, 10757891, 2008440, 67286035, 59109067, 167008993, 50174660, 4579963, 50191602, 53794247, 45095076, 28965941, 42483034, 63429462, 63767811, 63898650, 64161220, 67858521, 64203852, 64318612, 64409142, 64495498, 65695718, 67631056, 70655692, 167948338, 64161933, 57366109, 6702548, 58257839, 9538646, 20952231, 27626100, 27699655, 36548409, 45817494, 39972608, 42571848, 44476057, 52677378, 45136125, ...]",translational elongation
GO:0051276,-0.502,-2.36,0.0,0.0,48,48,"[47583840, 6205815, 7009404, 68704849, 167587023, 68582616, 167775873, 167395572, 166037070, 68206104, 68150755, 67950579, 68671309, 67819674, 67744907, 67353000, 54869360, 68283504, 68757205, 68398056, 69263052, 166395738, 69322165, 69216024, 69422003, 69982823, 165788228, 20599288, 15818532, 13272074, 73137117, 8615254, 20657703, 67447849, 68523409, 8061262, 165757108, 6216247, 69878150, 42554334, 16155541, 19593932, 22610648, 43409784, 46287949, 46309554, 33919596, 20217211]",chromosome organization
GO:0015672,-0.48,-2.313,0.0,1.243e-05,61,53,"[46925916, 62665934, 7441750, 1813814, 47320492, 32363526, 50924924, 16352421, 47036627, 30284856, 7825230, 50069858, 18219005, 17172671, 36401269, 46592634, 10920382, 12167871, 7089159, 45110823, 6477545, 45987173, 62428846, 44977402, 42324963, 61472524, 74407057, 63423769, 10791111, 62678634, 21076498, 62659413, 46694653, 42879865, 65396608, 15749752, 14739479, 23585157, 32313561, 12106659, 59732024, 37477462, 62521088, 54159053, 53957349, 62354250, 66130141, 54924776, 1994037, 68120784, 1438473, 18669105, 1408901]",monovalent inorganic cation transport
GO:0043038,-0.461,-2.304,0.0,1.088e-05,65,63,"[18885724, 167282458, 65389288, 64506172, 167277868, 62726266, 62393365, 52576745, 52481957, 28962162, 11791224, 45031759, 66440034, 66068248, 54345745, 32783706, 63271051, 41750634, 44453573, 48472643, 43335971, 58267232, 62491289, 62542342, 62432381, 6789864, 56002051, 62472987, 8291745, 28669037, 62150994, 62280444, 62282436, 62432270, 166993709, 49798158, 62866527, 62756455, 45315086, 44059774, 167298936, 60582561, 10898197, 39555884, 47669311, 50657403, 53046930, 63675249, 167629607, 37497956, 62702496, 62698119, 55962530, 34300085, 56053821, 46046353, 18185613, 62919140, 50203419, 62926562, 17624588, 44081820, 20634800]",amino acid activation
GO:0006818,-0.48,-2.299,0.0,1.934e-05,61,53,"[46925916, 62665934, 7441750, 1813814, 47320492, 32363526, 50924924, 16352421, 47036627, 30284856, 7825230, 50069858, 18219005, 17172671, 36401269, 46592634, 10920382, 12167871, 7089159, 45110823, 6477545, 45987173, 62428846, 44977402, 42324963, 61472524, 74407057, 63423769, 10791111, 62678634, 21076498, 62659413, 46694653, 42879865, 65396608, 15749752, 14739479, 23585157, 32313561, 12106659, 59732024, 37477462, 62521088, 54159053, 53957349, 62354250, 66130141, 54924776, 1994037, 68120784, 1438473, 18669105, 1408901]",hydrogen transport
GO:0034660,-0.461,-2.298,0.0,1.741e-05,65,63,"[18885724, 167282458, 65389288, 64506172, 167277868, 62726266, 62393365, 52576745, 52481957, 28962162, 11791224, 45031759, 66440034, 66068248, 54345745, 32783706, 63271051, 41750634, 44453573, 48472643, 43335971, 58267232, 62491289, 62542342, 62432381, 6789864, 56002051, 62472987, 8291745, 28669037, 62150994, 62280444, 62282436, 62432270, 166993709, 49798158, 62866527, 62756455, 45315086, 44059774, 167298936, 60582561, 10898197, 39555884, 47669311, 50657403, 53046930, 63675249, 167629607, 37497956, 62702496, 62698119, 55962530, 34300085, 56053821, 46046353, 18185613, 62919140, 50203419, 62926562, 17624588, 44081820, 20634800]",ncRNA metabolic process


In [14]:
cc_res = run_go_gsea(rank_df, cc_map_f, seed=1111, outdir=out_dir)
cc_rt = cc_res.query('nes > 0 and pval < 0.05').sort_values('nes', ascending=False)
cc_control = cc_res.query('nes < 0 and pval < 0.05').sort_values('nes', ascending=True)

2018-02-13 16:55:00,062 Parsing data files for GSEA.............................
  mask |= (ar1 == a)
2018-02-13 16:55:00,367 0020 gene_sets used for further statistical testing.....
2018-02-13 16:55:00,368 Start to run GSEA...Might take a while..................
2018-02-13 16:56:42,374 Start to generate gseapy reports, and produce figures...
2018-02-13 16:56:49,540 Congratulations...GSEAPY run successfully...............


In [15]:
cc_rt

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0042995,0.71,4.731,0.0,0.0,85,69,"[62398453, 62543065, 62698072, 62881914, 62350005, 62450054, 64272473, 62769590, 62343668, 62271880, 62182764, 22255856, 62285751, 63245372, 62964422, 57741843, 62342496, 65192787, 67423079, 166851534, 13433916, 22625707, 42831732, 62417891, 55916126, 56924487, 62085029, 62923262, 28438217, 64855697, 63865661, 62405618, 63669394, 63452851, 62926584, 62853162, 62849941, 62510437, 21809431, 62265597, 38214195, 62582614, 43730808, 62157933, 45950065, 63205708, 63306571, 49361447, 1886631, 39745655, 23658537, 62766323, 62838017, 23658535, 63093125, 64376861, 63144665, 63039819, 48844574, 62966887, 49492210, 63798592, 65147516, 63489079, 65866265, 167570053, 64167744, 65968008, 68570136]",cell projection
GO:0044463,0.726,4.672,0.0,0.0,73,60,"[62398453, 62543065, 62698072, 62881914, 62350005, 62450054, 64272473, 62769590, 62343668, 62271880, 62182764, 22255856, 62285751, 63245372, 62964422, 166851534, 13433916, 22625707, 42831732, 62417891, 55916126, 56924487, 62085029, 62923262, 28438217, 64855697, 63865661, 62405618, 63669394, 63452851, 62926584, 62853162, 62849941, 62510437, 21809431, 62265597, 38214195, 62582614, 62157933, 63205708, 63306571, 49361447, 1886631, 39745655, 23658537, 62766323, 62838017, 23658535, 63093125, 64376861, 63144665, 63039819, 48844574, 62966887, 49492210, 63798592, 65147516, 63489079, 65866265, 167570053]",cell projection part
GO:0005886,0.722,4.34,0.0,0.0,66,46,"[13445022, 38185668, 63133556, 64360880, 62974897, 33842604, 48193770, 59182599, 47501503, 63101601, 62377777, 60186186, 47630348, 63001215, 41499392, 56236083, 46173499, 48858203, 63042548, 66579509, 45717185, 63078668, 31260831, 63112593, 66571595, 53716631, 46410130, 166005029, 63495297, 55212582, 64128095, 63055964, 63188988, 67439647, 33471949, 58120150, 64649372, 62788046, 39552245, 44142111, 39280970, 37483090, 62049799, 63059959, 66484337, 37439011]",plasma membrane
GO:1904949,0.529,3.004,0.0,0.0,58,39,"[56076521, 62946388, 63332162, 13203808, 12135439, 6924770, 22562105, 64995594, 6569127, 10829039, 56215621, 66732966, 40852561, 36311677, 58930696, 10689069, 5711881, 77351967, 28360306, 17587826, 15700990, 63363572, 63452797, 47513944, 28240379, 63205501, 40726507, 40674602, 27931540, 40686252, 66502215, 66602623, 8187004, 28273397, 4596523, 54544767, 41124596, 8861059, 5062281]",ATPase complex
GO:0044459,0.529,2.994,0.0,0.0,58,39,"[56076521, 62946388, 63332162, 13203808, 12135439, 6924770, 22562105, 64995594, 6569127, 10829039, 56215621, 66732966, 40852561, 36311677, 58930696, 10689069, 5711881, 77351967, 28360306, 17587826, 15700990, 63363572, 63452797, 47513944, 28240379, 63205501, 40726507, 40674602, 27931540, 40686252, 66502215, 66602623, 8187004, 28273397, 4596523, 54544767, 41124596, 8861059, 5062281]",plasma membrane part
GO:1990351,0.529,2.989,0.0,0.0,58,39,"[56076521, 62946388, 63332162, 13203808, 12135439, 6924770, 22562105, 64995594, 6569127, 10829039, 56215621, 66732966, 40852561, 36311677, 58930696, 10689069, 5711881, 77351967, 28360306, 17587826, 15700990, 63363572, 63452797, 47513944, 28240379, 63205501, 40726507, 40674602, 27931540, 40686252, 66502215, 66602623, 8187004, 28273397, 4596523, 54544767, 41124596, 8861059, 5062281]",transporter complex
GO:0016020,0.285,2.252,0.0,4.269e-05,221,137,"[61717144, 13445022, 61655642, 61935358, 38185668, 63133556, 64360880, 62974897, 33842604, 51621514, 48193770, 59182599, 63471490, 62054139, 47501503, 63101601, 62377777, 63948514, 60186186, 47630348, 38216270, 63001215, 41499392, 37021135, 53943329, 66043038, 56236083, 53989012, 46173499, 48858203, 61530003, 63042548, 66579509, 59156293, 53598491, 39754209, 42028162, 61671561, 51626281, 36220708, 45717185, 45444248, 14080080, 63078668, 42005004, 31260831, 63112593, 66571595, 53716631, 46410130, 58048009, 46514055, 166005029, 63495297, 55212582, 64128095, 63055964, 63188988, 15491565, 67439647, 33471949, 51621425, 58120150, 64649372, 62788046, 39552245, 44142111, 39280970, 37483090, 62049799, 63059959, 66484337, 37439011, 54375462, 61638376, 63397584, 46766861, 40649630, 64296083, 61946934, 50027463, 19318696, 55546725, 41561744, 17337686, 61586644, 38244546, 66330668, 46170023, 13217126, 61504270, 57221749, 69027792, 2010342, 66834457, 42083891, 58286110, 61801555, 61808771, 65483290, ...]",membrane


In [16]:
cc_control

Unnamed: 0_level_0,es,nes,pval,fdr,gene_set_size,matched_size,genes,name
Term,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
GO:0044391,-0.435,-2.677,0.0,0.0,250,232,"[166846359, 58033748, 29219123, 66047049, 65570045, 10689110, 28857394, 67104490, 67469680, 21326718, 65717267, 64021749, 22810887, 166666623, 65477380, 29213833, 27700985, 5107176, 63807427, 67230263, 66875237, 23714033, 14081574, 5746583, 14603981, 64503554, 47926867, 31549327, 46818577, 19011820, 31255291, 54181584, 55004337, 65976369, 47615003, 36401449, 18689097, 23587815, 38785731, 67859417, 45975512, 66402327, 56106808, 14150438, 167706979, 8700996, 7522455, 29612526, 67061811, 166807349, 168017014, 67389484, 29341916, 4774708, 20720618, 74406712, 82054302, 7259645, 66350717, 66541818, 19073235, 65354409, 6240736, 28544550, 33105645, 65409994, 6085738, 64615908, 64667783, 64853626, 166456781, 67094509, 7290535, 21828346, 38125758, 65886657, 21335616, 29757347, 65290575, 65595228, 29972795, 166757246, 64522941, 168226647, 27851959, 60350763, 16039210, 65408656, 14890274, 64078191, 165795593, 168064143, 64122877, 64937458, 65956045, 64744508, 18692349, 64037691, 66308665, 66591237, ...]",ribosomal subunit
GO:0015935,-0.436,-2.601,0.0,0.0,180,175,"[166846359, 58033748, 29219123, 66047049, 65570045, 10689110, 28857394, 67104490, 21326718, 65717267, 64021749, 22810887, 166666623, 65477380, 29213833, 27700985, 5107176, 67230263, 66875237, 23714033, 14081574, 5746583, 14603981, 46818577, 19011820, 31255291, 54181584, 55004337, 65976369, 47615003, 38785731, 45975512, 66402327, 56106808, 167706979, 7522455, 29612526, 67061811, 166807349, 168017014, 67389484, 29341916, 4774708, 20720618, 74406712, 82054302, 7259645, 66350717, 66541818, 6085738, 64615908, 64667783, 64853626, 166456781, 67094509, 7290535, 21828346, 65886657, 21335616, 64522941, 168226647, 27851959, 60350763, 16039210, 65408656, 14890274, 64078191, 165795593, 168064143, 64122877, 65956045, 64744508, 64037691, 66308665, 66591237, 7381866, 12255887, 14783236, 10815303, 30844263, 66920578, 65156191, 65488847, 4343484, 63998974, 64115139, 20106802, 67648516, 66050469, 15743412, 64239927, 18720605, 5107198, 66756705, 28225353, 63962734, 66324910, 66196239, 66242221, 9904239, ...]",small ribosomal subunit
GO:0005622,-0.4,-2.59,0.0,0.0,474,420,"[67600784, 37911383, 23712773, 7822625, 67530100, 40801738, 22855140, 45552631, 69853796, 23028276, 23390113, 21299373, 12268875, 57729363, 63212163, 166846359, 61919677, 27258242, 61886286, 61907404, 58033748, 29219123, 21327765, 20995791, 46820147, 66047049, 67795556, 7529421, 65570045, 10689110, 39439162, 4009240, 28857394, 7569326, 32784939, 67104490, 21902328, 4100571, 7092868, 67469680, 21326718, 29940019, 65717267, 36785606, 64021749, 22810887, 61895769, 70180230, 20322451, 166666623, 22163880, 61917199, 16613806, 29768242, 65477380, 68010124, 61908356, 29213833, 61893464, 27700985, 67963058, 63035382, 23619953, 5107176, 61913890, 62860561, 63807427, 3972270, 4344665, 23587484, 66802302, 23714033, 14081574, 5746583, 14603981, 66405184, 57932979, 68127142, 68307507, 74304065, 20904247, 64503554, 20813343, 47926867, 62535487, 67981082, 31549327, 64312859, 67499229, 70329620, 67855986, 44292295, 40441269, 20275740, 8092444, 46818577, 28624369, 24075428, 10276368, 67569222, ...]",intracellular
GO:0044446,-0.404,-2.514,0.0,0.0,283,261,"[61530003, 165642734, 61500275, 61468943, 165644696, 165642706, 165677957, 61506894, 61501151, 61499720, 61518541, 61501147, 61519450, 2041002, 1822783, 2058497, 2057470, 1958375, 209005, 68051075, 1818671, 166846359, 58033748, 29219123, 66047049, 65570045, 10689110, 2456528, 28857394, 67104490, 1158404, 1023145, 67469680, 21326718, 65717267, 64021749, 22810887, 166666623, 65477380, 29213833, 27700985, 5107176, 63807427, 67230263, 66875237, 23714033, 14081574, 5746583, 14603981, 64503554, 47926867, 31549327, 46818577, 61504270, 19011820, 31255291, 54181584, 55004337, 65976369, 47615003, 36401449, 18689097, 23587815, 38785731, 67859417, 45975512, 66402327, 56106808, 14150438, 167706979, 8700996, 7522455, 29612526, 67061811, 166807349, 168017014, 67389484, 29341916, 4774708, 20720618, 74406712, 82054302, 7259645, 66350717, 66541818, 19073235, 65354409, 6240736, 28544550, 33105645, 65409994, 6085738, 64615908, 64667783, 64853626, 166456781, 67094509, 7290535, 21828346, 38125758, ...]",intracellular organelle part
GO:0015934,-0.446,-2.184,0.0,1.472e-05,70,57,"[67469680, 63807427, 64503554, 47926867, 31549327, 36401449, 18689097, 23587815, 67859417, 14150438, 8700996, 19073235, 65354409, 6240736, 28544550, 33105645, 65409994, 38125758, 29757347, 65290575, 65595228, 29972795, 166757246, 64937458, 18692349, 18688909, 23586804, 4627187, 3497451, 64848512, 65186468, 10739603, 56779021, 166529978, 67545681, 23590811, 65528291, 67966912, 7412647, 65043810, 67795836, 65596513, 58476241, 40477281, 21716821, 18688777, 56544527, 68531027, 166221685, 65380743, 40938225, 3730682, 29733203, 30331294, 23531672, 64585856, 65147107]",large ribosomal subunit
GO:0033178,-0.481,-2.123,0.0,1.227e-05,45,38,"[62665934, 7441750, 1813814, 32363526, 50924924, 16352421, 47036627, 30284856, 7825230, 50069858, 18219005, 17172671, 46592634, 10920382, 12167871, 7089159, 6477545, 45987173, 62428846, 74407057, 63423769, 10791111, 62678634, 21076498, 62659413, 15749752, 14739479, 23585157, 32313561, 12106659, 59732024, 62521088, 53957349, 62354250, 1994037, 1438473, 18669105, 1408901]","proton-transporting two-sector ATPase complex, catalytic domain"
GO:0005737,-0.337,-2.043,0.0,0.0001157,298,203,"[63792450, 63218511, 69374441, 50921113, 66223394, 47390357, 66118699, 22324634, 43936470, 36923984, 63167857, 21365243, 64924072, 54139955, 63282343, 62209062, 47626575, 66876017, 62058156, 40043291, 44201319, 48102552, 28150928, 38842066, 63523057, 15046618, 165824520, 60403481, 52158557, 21809442, 62907054, 21210212, 4096188, 10739053, 2066706, 64990955, 62199335, 68997858, 44628844, 2514188, 29187158, 14914677, 2288535, 65051813, 20650649, 17102205, 39760203, 45996733, 2123537, 38006889, 20870932, 37910892, 29722536, 62268022, 5495963, 48047323, 64390477, 12255964, 70035553, 407882, 62297581, 55394461, 59676363, 42132269, 46051980, 18885724, 167282458, 65389288, 64506172, 167277868, 62726266, 62393365, 52576745, 52481957, 28962162, 11791224, 67548959, 18283358, 45031759, 61848181, 60291611, 29370388, 66440034, 66068248, 54345745, 32783706, 63271051, 41750634, 44453573, 48472643, 43335971, 58267232, 62491289, 62542342, 62432381, 64657538, 6789864, 56002051, 28685733, 39707614, ...]",cytoplasm
GO:0044422,-0.269,-1.709,0.0,0.005154,356,321,"[62398453, 62543065, 62698072, 62881914, 62350005, 62450054, 64272473, 62769590, 62343668, 62271880, 62182764, 22255856, 62285751, 63245372, 62964422, 166851534, 13433916, 22625707, 42831732, 62417891, 55916126, 56924487, 61530003, 62085029, 62923262, 28438217, 64855697, 63865661, 62405618, 63669394, 63452851, 62926584, 62853162, 62849941, 62510437, 21809431, 62265597, 38214195, 62582614, 62157933, 165642734, 61500275, 61468943, 63205708, 63306571, 49361447, 1886631, 39745655, 165644696, 165642706, 23658537, 62766323, 62838017, 23658535, 63093125, 165677957, 61506894, 61501151, 61499720, 64376861, 63144665, 63039819, 48844574, 61518541, 62966887, 49492210, 61501147, 61519450, 63798592, 65147516, 63489079, 65866265, 2041002, 1822783, 2058497, 2057470, 1958375, 209005, 68051075, 1818671, 166846359, 58033748, 29219123, 66047049, 65570045, 10689110, 167570053, 2456528, 28857394, 67104490, 1158404, 1023145, 67469680, 21326718, 65717267, 64021749, 22810887, 166666623, 65477380, 29213833, ...]",organelle part
