# Load Modules

In [6]:
import pandas as pd
import matplotlib.pyplot as plt
from idea import run_gsea, IDEA
plt.style.use("dark_background")

# Load Data

In [7]:
genes = pd.read_csv("../results/results.gene_results.tsv", sep="\t")
background = genes.gene.values
sig = genes[genes.fdr < 0.05]
sig

Unnamed: 0,gene,fold_change,log_fold_change,score_low,pvalue_low,fdr_low,score_high,pvalue_high,fdr_high,pvalue,fdr,phenotype_score
16,RBM4,0.924395,-0.113419,1.798724e-05,0.000111,0.008387,9.131367e-01,0.917649,1.000000,0.000111,0.008387,-0.448659
25,IGHMBP2,1.266980,0.341394,1.000000e+00,1.000000,1.000000,4.179358e-05,0.000142,0.011429,0.000142,0.011429,1.313879
37,GRHL2,0.803597,-0.315456,2.066461e-07,0.000004,0.000625,1.000000e+00,1.000000,1.000000,0.000004,0.000625,-1.688864
46,ZNF143,1.351960,0.435053,1.000000e+00,1.000000,1.000000,2.156983e-04,0.000700,0.039500,0.000700,0.039500,1.372622
66,GPBP1,0.758331,-0.399100,3.028670e-05,0.000115,0.008387,1.000000e+00,1.000000,1.000000,0.000115,0.008387,-1.571958
...,...,...,...,...,...,...,...,...,...,...,...,...
2095,MRGBP,1.788541,0.838783,1.000000e+00,1.000000,1.000000,3.137711e-07,0.000004,0.000833,0.000004,0.000833,4.490614
2110,GTF3A,1.135437,0.183247,1.000000e+00,1.000000,1.000000,1.783643e-05,0.000062,0.006364,0.000062,0.006364,0.771031
2128,ZBTB2,1.181355,0.240443,1.000000e+00,1.000000,1.000000,1.636613e-04,0.000545,0.033243,0.000545,0.033243,0.784762
2146,FOXH1,1.371145,0.455381,1.000000e+00,1.000000,1.000000,4.385269e-06,0.000009,0.001111,0.000009,0.001111,2.300901


# Define Function to run GSEA, Build Networks, and Save Files

In [8]:
def build_network(
    degs: pd.DataFrame,
    library: str,
    label: str,
    background: list = None,
    max_terms: int = 20,
    threshold: float = 0.15,
    use_pvalue: bool = False,
):
    """
    A function that accepts DEGs and runs GSEA and IDEA on them.
    """

    # Run GSEA
    gsea = run_gsea(
        degs.gene, 
        library, 
        background=background, 
        threshold=threshold, 
        use_pvalue=use_pvalue
    )

    # If no significant terms, return
    if gsea.shape[0] == 0:
        print("No significant terms found for {}!".format(library))
        return
    
    # Run IDEA
    idea = IDEA(
        degs,
        gsea.head(max_terms),
        deg_color_name="log_fold_change",
        deg_size_name="fdr",
        neg_log_xform_degs_color=False,
        gene_palette="seismic",
        center=0,
        absolute_degs_color=False,  
    )

    # Plot
    idea.visualize(
        "../figures/{}_{}.html".format(label, library),
    )

# Run IDEA Analysis

Here we are going to perform the IDEA analysis using 3 different gene sets:

- **BP**: Biological Process
- **CC**: Cellular Component
- **MF**: Molecular Function

The resulting figures can be opened in a browser and are in `figures/*.html` when starting at the root of this project directory

**Note**:
Notice that we are running each one with and without a background.

This is to highlight the importance of using one and to show the differences that can be made in inference without adjusting for it.

## Mixed Population (Positive / Negative)

In [9]:
build_network(sig, "BP", "mixed", background=background)
build_network(sig, "CC", "mixed", background=background)
build_network(sig, "MF", "mixed", background=background)

build_network(sig, "BP", "mixed_nobg", background=None)
build_network(sig, "CC", "mixed_nobg", background=None)
build_network(sig, "MF", "mixed_nobg", background=None)

2023-10-06 09:42:52 - INFO - Found 80 differentially expressed genes.
2023-10-06 09:42:52 - INFO - Found 12 gene ontology terms.
2023-10-06 09:42:52 - INFO - Built bipartite graph with 12 terms, 19 genes, and 58 edges.
2023-10-06 09:42:52 - INFO - Visualization saved to ../figures/mixed_BP.html.
2023-10-06 09:42:54 - INFO - Found 80 differentially expressed genes.
2023-10-06 09:42:54 - INFO - Found 13 gene ontology terms.
2023-10-06 09:42:54 - INFO - Built bipartite graph with 13 terms, 54 genes, and 125 edges.
2023-10-06 09:42:54 - INFO - Visualization saved to ../figures/mixed_CC.html.
2023-10-06 09:42:56 - INFO - Found 80 differentially expressed genes.
2023-10-06 09:42:56 - INFO - Found 20 gene ontology terms.
2023-10-06 09:42:56 - INFO - Built bipartite graph with 20 terms, 48 genes, and 177 edges.
2023-10-06 09:42:56 - INFO - Visualization saved to ../figures/mixed_nobg_BP.html.


No significant terms found for MF!


2023-10-06 09:42:57 - INFO - Found 80 differentially expressed genes.
2023-10-06 09:42:57 - INFO - Found 15 gene ontology terms.
2023-10-06 09:42:57 - INFO - Built bipartite graph with 15 terms, 55 genes, and 124 edges.
2023-10-06 09:42:57 - INFO - Visualization saved to ../figures/mixed_nobg_CC.html.
2023-10-06 09:42:58 - INFO - Found 80 differentially expressed genes.
2023-10-06 09:42:58 - INFO - Found 20 gene ontology terms.
2023-10-06 09:42:58 - INFO - Built bipartite graph with 20 terms, 47 genes, and 146 edges.
2023-10-06 09:42:58 - INFO - Visualization saved to ../figures/mixed_nobg_MF.html.


## Negative Population

In [10]:
neg_sig = sig[sig.log_fold_change < 0]

build_network(neg_sig, "BP", "neg", background=background)
build_network(neg_sig, "CC", "neg", background=background)
build_network(neg_sig, "MF", "neg", background=background)

build_network(neg_sig, "BP", "neg_nobg", background=None)
build_network(neg_sig, "CC", "neg_nobg", background=None)
build_network(neg_sig, "MF", "neg_nobg", background=None)

2023-10-06 09:43:04 - INFO - Found 40 differentially expressed genes.
2023-10-06 09:43:04 - INFO - Found 20 gene ontology terms.
2023-10-06 09:43:04 - INFO - Built bipartite graph with 20 terms, 26 genes, and 95 edges.
2023-10-06 09:43:04 - INFO - Visualization saved to ../figures/neg_nobg_BP.html.


No significant terms found for BP!
No significant terms found for CC!
No significant terms found for MF!


2023-10-06 09:43:05 - INFO - Found 40 differentially expressed genes.
2023-10-06 09:43:05 - INFO - Found 9 gene ontology terms.
2023-10-06 09:43:05 - INFO - Built bipartite graph with 9 terms, 28 genes, and 60 edges.
2023-10-06 09:43:05 - INFO - Visualization saved to ../figures/neg_nobg_CC.html.
2023-10-06 09:43:06 - INFO - Found 40 differentially expressed genes.
2023-10-06 09:43:06 - INFO - Found 20 gene ontology terms.
2023-10-06 09:43:06 - INFO - Built bipartite graph with 20 terms, 20 genes, and 69 edges.
2023-10-06 09:43:06 - INFO - Visualization saved to ../figures/neg_nobg_MF.html.


## Positive Population

In [11]:
pos_sig = sig[sig.log_fold_change > 0]

build_network(pos_sig, "BP", "pos", background=background)
build_network(pos_sig, "CC", "pos", background=background)
build_network(pos_sig, "MF", "pos", background=background)

build_network(pos_sig, "BP", "pos_nobg", background=None)
build_network(pos_sig, "CC", "pos_nobg", background=None)
build_network(pos_sig, "MF", "pos_nobg", background=None)


2023-10-06 09:43:07 - INFO - Found 40 differentially expressed genes.
2023-10-06 09:43:07 - INFO - Found 20 gene ontology terms.
2023-10-06 09:43:07 - INFO - Built bipartite graph with 20 terms, 14 genes, and 61 edges.
2023-10-06 09:43:07 - INFO - Visualization saved to ../figures/pos_BP.html.
2023-10-06 09:43:09 - INFO - Found 40 differentially expressed genes.
2023-10-06 09:43:09 - INFO - Found 10 gene ontology terms.
2023-10-06 09:43:09 - INFO - Built bipartite graph with 10 terms, 11 genes, and 19 edges.
2023-10-06 09:43:09 - INFO - Visualization saved to ../figures/pos_CC.html.
2023-10-06 09:43:12 - INFO - Found 40 differentially expressed genes.
2023-10-06 09:43:12 - INFO - Found 20 gene ontology terms.
2023-10-06 09:43:12 - INFO - Built bipartite graph with 20 terms, 28 genes, and 100 edges.
2023-10-06 09:43:12 - INFO - Visualization saved to ../figures/pos_nobg_BP.html.


No significant terms found for MF!


2023-10-06 09:43:13 - INFO - Found 40 differentially expressed genes.
2023-10-06 09:43:13 - INFO - Found 20 gene ontology terms.
2023-10-06 09:43:13 - INFO - Built bipartite graph with 20 terms, 28 genes, and 76 edges.
2023-10-06 09:43:13 - INFO - Visualization saved to ../figures/pos_nobg_CC.html.
2023-10-06 09:43:14 - INFO - Found 40 differentially expressed genes.
2023-10-06 09:43:14 - INFO - Found 20 gene ontology terms.
2023-10-06 09:43:14 - INFO - Built bipartite graph with 20 terms, 26 genes, and 63 edges.
2023-10-06 09:43:14 - INFO - Visualization saved to ../figures/pos_nobg_MF.html.
