<a href="https://colab.research.google.com/github/shravanisci/ABC-Immune-Circuit/blob/main/COVID_ABC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install GEOparse

In [None]:
import os
os.makedirs("results", exist_ok=True)
print("results/ folder ready")


In [None]:
import GEOparse
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

os.makedirs("results_ibd", exist_ok=True)


In [None]:
GSE_ID = "GSE157103"

print("Downloading GEO dataset...")
gse = GEOparse.get_GEO(GSE_ID, destdir=".", annotate_gpl=True)

print("Total samples:", len(gse.gsms))


In [None]:
print("Supplementary files available:")
for f in gse.metadata.get("supplementary_file", []):
    print(f)


In [None]:
import urllib.request
import os

tpm_url = "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE157nnn/GSE157103/suppl/GSE157103_genes.tpm.tsv.gz"
tpm_file = "GSE157103_genes.tpm.tsv.gz"

if not os.path.exists(tpm_file):
    print("Downloading TPM matrix...")
    urllib.request.urlretrieve(tpm_url, tpm_file)
    print("Download complete.")
else:
    print("TPM file already exists.")


In [None]:
!ls -lh GSE157103_genes.tpm.tsv.gz


In [None]:
#core data loading cell
expr = pd.read_csv(
    "GSE157103_genes.tpm.tsv.gz",
    sep="\t",
    index_col=0
)

print("Raw matrix shape (genes × samples):", expr.shape)
expr.iloc[:5, :5]


In [None]:
# transpose so rows = samples
expr = expr.T

# standardize gene names
expr.columns = expr.columns.str.upper()

print("Expression matrix ready")
print("Shape (samples × genes):", expr.shape)


In [None]:
out_path = "results_ibd/GSE157103_expression.csv"
expr.to_csv(out_path)

print("Saved expression matrix to:", out_path)


In [None]:
def resolve_signal(expr, gene_list):
    present = [g for g in gene_list if g in expr.columns]
    if len(present) == 0:
        return None
    return expr[present].mean(axis=1)


In [None]:
signals = {}

signals["TNF"]  = resolve_signal(expr, ["TNF", "TNFRSF1A", "TNFRSF1B"])
signals["IL6"]  = resolve_signal(expr, ["IL6", "IL6R", "IL6ST"])
signals["IFNG"] = resolve_signal(expr, ["IFNG", "IFNGR1", "IFNGR2"])
signals["IL10"] = resolve_signal(expr, ["IL10", "IL10RA", "IL10RB"])

signals_clean = {k: v for k, v in signals.items() if v is not None}

print("Resolved signals:", list(signals_clean.keys()))

signals_df = pd.DataFrame(signals_clean)
signals_df.head()


In [None]:
thresholds_reg = {
    "TNF":  signals_df["TNF"].quantile(0.75) if "TNF" in signals_df else None,
    "IL6":  signals_df["IL6"].quantile(0.75) if "IL6" in signals_df else None,
    "IFNG": signals_df["IFNG"].quantile(0.25) if "IFNG" in signals_df else None,
}

thresholds_reg


NameError: name 'signals_df' is not defined

In [None]:
#regulatory circuit
abc_on_reg = []

for _, row in signals_df.iterrows():
    tnf_ok  = ("TNF" in thresholds)  and row.get("TNF", 0)  > thresholds["TNF"]
    il6_ok  = ("IL6" in thresholds)  and row.get("IL6", 0)  > thresholds["IL6"]
    ifng_ok = ("IFNG" in thresholds) and row.get("IFNG", 0) < thresholds["IFNG"]

    abc_on_reg.append(int(tnf_ok and il6_ok and ifng_ok))

abc_on_reg = pd.Series(abc_on_reg, index=signals_df.index)

print("Regulatory ABC ON:", abc_on_reg.sum(), "/", len(abc_on_reg))
f"({100*abc_on_reg.mean():.2f}%)"


In [None]:
thresholds_eff = {
    "TNF":  signals_df["TNF"].quantile(0.25) if "TNF" in signals_df else None,
    "IL6":  signals_df["IL6"].quantile(0.25) if "IL6" in signals_df else None,
    "IFNG": signals_df["IFNG"].quantile(0.75) if "IFNG" in signals_df else None,
}

thresholds_eff

In [None]:
# effector circuit
abc_on_eff = []

for _, row in signals_df.iterrows():
    tnf_ok  = ("TNF" in thresholds_eff)  and row.get("TNF", 0)  > thresholds_eff["TNF"]
    il6_ok  = ("IL6" in thresholds_eff)  and row.get("IL6", 0)  > thresholds_eff["IL6"]
    ifng_ok = ("IFNG" in thresholds_eff) and row.get("IFNG", 0) > thresholds_eff["IFNG"]

    abc_on_eff.append(int(tnf_ok and il6_ok and ifng_ok))

abc_on_eff = pd.Series(abc_on_eff, index=signals_df.index)

print("Effector ABC ON:", abc_on_eff.sum(), "/", len(abc_on_eff))
f"({100*abc_on_eff.mean():.2f}%)"


In [None]:
abc_on_eff.to_csv("results/GSE157103_ABC_effector_state.csv")


In [None]:
abc_on_reg.to_csv("results/GSE157103_ABC_regulatory_state.csv")


In [None]:
os.listdir("results")

In [None]:
import os
os.makedirs("results", exist_ok=True)

abc_on_reg.to_csv("results/COVID_regulatory.csv")
abc_on_eff.to_csv("results/COVID_effector.csv")

print(os.listdir("results"))


NameError: name 'abc_on_reg' is not defined