In [None]:
import requests
import pandas as pd
import matplotlib.pyplot as plt
import yaml, os

# ======== Load config ========
with open("config/config.yaml") as f:
    config = yaml.safe_load(f)

CANCER_TYPE = config["cancer_type"]
METRIC = config["metric"]
RESULTS_DIR = config["results_dir"]

# ======== Input / Output ========
genes_path = os.path.join(RESULTS_DIR, f"top_PC1_contributor_genes__{CANCER_TYPE}__{METRIC}.tsv")
out_table = os.path.join(RESULTS_DIR, f"functional_annotation__{CANCER_TYPE}__{METRIC}.tsv")
out_plot = os.path.join(RESULTS_DIR, f"functional_annotation__{CANCER_TYPE}__{METRIC}.pdf")

# ======== Load genes ========
genes_df = pd.read_csv(genes_path, sep="\t")
genes = genes_df["Gene_Symbol"].dropna().unique().tolist()

if len(genes) < 5:
    raise ValueError(f"Too few genes ({len(genes)}) for enrichment analysis.")

print(f"Running functional annotation for {len(genes)} genes in {CANCER_TYPE}...")

# ======== g:Profiler API call ========
URL = "https://biit.cs.ut.ee/gprofiler/api/gost/profile/"
payload = {
    "organism": "hsapiens",
    "query": genes,
    "sources": ["GO:BP", "KEGG", "REAC", "WP", "HP"],
}

r = requests.post(URL, json=payload)
r.raise_for_status()
res = r.json()

# ======== Parse results ========
if "result" not in res or len(res["result"]) == 0:
    print("⚠️ No enrichment results found.")
    pd.DataFrame(columns=["term_name", "p_value", "source"]).to_csv(out_table, sep="\t", index=False)
else:
    df = pd.DataFrame(res["result"])
    df = df[["term_name", "source", "p_value", "term_size", "intersection_size"]]
    df = df.sort_values("p_value").head(15)
    df.to_csv(out_table, sep="\t", index=False)
    print(f"Saved annotation results → {out_table}")

    # ======== Plot top terms ========
    plt.figure(figsize=(7,5))
    plt.barh(df["term_name"][::-1], -df["p_value"].apply(lambda x: np.log10(x))[::-1])
    plt.xlabel("-log10(p-value)")
    plt.ylabel("Enriched term")
    plt.tight_layout()
    plt.savefig(out_plot)
    print(f"Saved plot → {out_plot}")