In [4]:
from helpers.gse import collection_to_R, nice_kegg, formatter_to_fix_letter_case
from gsea_api.molecular_signatures_db import MolecularSignaturesDatabase, GeneMatrixTransposed

Pathway enrichment analyses of results for specific analysis methods are presented in respective notebooks.

This notebook presents the methods an data sources for such analyses:

In [5]:
from jupyter_helpers.selective_import import notebooks_importer

from analyses.integration.Uniform_preprocessing import raw_protein_matrix, raw_rna_matrix

In [6]:
db = MolecularSignaturesDatabase('data/msigdb', version='7.0')
reactome_new_symbol = GeneMatrixTransposed.from_gmt('ReactomePathways.gmt', name='reactome_new_symbol')
kegg_pathways_symbol = db.load(gene_sets='c2.cp.kegg', id_type='symbols').format_names(nice_kegg)
reactome_pathways_symbol = db.load(gene_sets='c2.cp.reactome', id_type='symbols').format_names(
    formatter_to_fix_letter_case(reactome_new_symbol)
)
gene_ontology_symbol = db.load(gene_sets='c5.all', id_type='symbols')
immune_signature_symbol = db.load(gene_sets='c7.all', id_type='symbols')
hallmarks_symbol = db.load(gene_sets='h.all', id_type='symbols').format_names(
    lambda x: x.replace('HALLMARK_', '').replace('_', ' ')
)

symbols = {*raw_rna_matrix.index, *raw_protein_matrix.index}

collections = {
    'hallmarks': [hallmarks_symbol, symbols],
    'hallmarks_for_rna': [hallmarks_symbol, set(raw_rna_matrix.index)],
    'hallmarks_for_protein': [hallmarks_symbol, set(raw_protein_matrix.index)],

    'reactome': [reactome_pathways_symbol, symbols],
    'reactome_for_rna': [reactome_pathways_symbol, set(raw_rna_matrix.index)],
    'reactome_for_protein': [reactome_pathways_symbol, set(raw_protein_matrix.index)]
}


def export_collections_to_r(collections):
    for name, (collection, genes) in collections.items():
        after_trimming = collection_to_R(collection, trim_to=genes, name=name)
        print(name, len(after_trimming))


export_collections_to_r(collections)

hallmarks 50
hallmarks_for_rna 50
hallmarks_for_protein 49
reactome 1401
reactome_for_rna 1401
reactome_for_protein 570
