## Run blitzGSEA

In [3]:
import os
import pandas as pd
import blitzgsea as blitz

In [12]:
def load_custom_gmt(path):
    """
    Parse a GMT file into a dict: {term_name: [gene1, gene2, ...], …}
    """
    with open(path, 'r') as f:
        return {
            parts[0]: parts[2:]   # skip description at index 1
            for line in f
            if (parts := line.strip().split('\t')) and len(parts) > 2
        }


def run_gsea_pandas(input_tsv, gmt_file, processes=4):
    """
    Reads TSV, renames columns, runs GSEA with custom pathways, saves as TSV.
    """
    # Load library
    library_sets = load_custom_gmt(gmt_file)

    # Read input TSV and rename columns
    df = pd.read_csv(input_tsv, sep="\t")
    df = df.rename(columns={0: "symbol", 1: "globalScore"})

    # Sort by score descending
    df = df.sort_values("globalScore", ascending=False)

    # Run GSEA
    res_df = blitz.gsea(df, library_sets, processes=processes).reset_index(names="Term")

    # Propagated edge from library_sets
    res_df["propagated_edge"] = res_df["Term"].map(lambda t: ",".join(library_sets.get(t, [])))

    # Extract ID and clean Term
    term_series = res_df["Term"]
    res_df["ID"] = term_series.str.extract(r"\[([^\]]+)\]", expand=False).fillna("")
    res_df["Term"] = term_series.str.replace(r"\s*\[[^\]]+\]", "", regex=True).str.strip()

    # Ensure leading_edge is a string
    if "leading_edge" in res_df.columns:
        res_df["leading_edge"] = res_df["leading_edge"].apply(
            lambda x: ",".join(x) if isinstance(x, (list, tuple)) else str(x)
        )

    # Reorder columns
    first_cols = ["Term", "ID"]
    res_df = res_df[first_cols + [c for c in res_df.columns if c not in first_cols]]

    # Save output
    output_path = f"{os.path.splitext(input_tsv)[0]}_gsea.tsv"
    res_df.to_csv(output_path, sep="\t", index=False)

    print(f"GSEA results saved to {output_path}")


In [13]:
scores = "/Users/polina/Pathwaganda/scr/gsea/1_file_gsea/OT-EFO_0003767-associated-targets-13_08_2025-v25_06.tsv"
library = "/Users/polina/Pathwaganda/data/gmt_pathway_files_prep/Reactome/ReactomePathways_merged.gmt"

run_gsea_pandas(scores, library, processes=4)

GSEA results saved to /Users/polina/Pathwaganda/scr/gsea/1_file_gsea/OT-EFO_0003767-associated-targets-13_08_2025-v25_06_gsea.tsv
