In [2]:
import pandas as pd
import matplotlib.pyplot as plt

In [7]:
######################################################

In [21]:
summary_table = pd.DataFrame({
    "Cytokine": [
        "MIP_1a", "Fractalkine", "MCP_1", "IL_11", "Eotaxin", "IL_3", "TIMP_1", "LIX", "IP_10", "IL_12p40",
        "G_CSF", "MIP_3b", "KC", "MIP_3a", "IL_17", "IL_6", "IL_9",
        "TARC", "EPO", "IL_13", "IL_1b", "MDC", "IL_2", "IFNb_1", "IL_5", "IL_20"
    ],
    "Interpretation_from_Model": [
        "Lower in Treated (WT)", "Lower in Treated (WT)", "Lower in Treated (WT)", "Lower in Treated (WT)", "Higher in Treated (WT)",
        "Lower in Treated (WT)", "Higher in Treated (WT)", "Lower in Treated (WT)", "Higher in Treated (WT)", "Higher in Treated (WT)",
        "Lower in Treated (Mut)", "Higher in Treated (Mut)", "Higher in Treated (Mut)", "Lower in Treated (Mut)", "Higher in Treated (Mut)",
        "Higher in Treated (Mut)", "Higher in Treated (Mut)",
        "Higher in Mut (Genotype)", "Lower in WT (Genotype)", "Higher in Mut (Genotype)", "Higher in Mut (Genotype)",
        "Higher in Mut (Genotype)", "Higher in Mut (Genotype)", "Higher in Mut (Genotype)", "Unmodeled", "Unmodeled"
    ],
    "Interpretation_from_Literature": [
        "Pro-inflammatory; up in tumors", "Mixed: tumor suppressive & supportive", "Pro-tumor macrophage recruitment", "Supports tumor survival, chemoresistance", "Increased after rapamycin (mice)",
        "Growth factor in hematologic tumors", "Matrix remodeling, poor prognosis", "Neutrophil recruitment, tumor-promoting", "Anti-tumor immune attractant", "Pro-inflammatory cytokine, immune activator",
        "Enhances tumor proliferation, high in cancers", "Chemokine, lymphoid homing", "Angiogenesis, tumor inflammation", "Inflammation, chemotactic", "Th17 cytokine, tumor growth",
        "IL-6: p53-linked tumor growth and immune escape", "Immune-modulatory, context-dependent in cancer",
        "Lymphoma-associated; T cell attractant", "Growth-promoting; p53 suppressor", "Th2 cytokine, tumor supportive", "Pro-inflammatory cytokine, tumor angiogenesis",
        "Chemokine; dendritic cell attractant", "T cell growth factor", "Type I IFN; antiviral, immune-modulatory", "IL-5: pro-eosinophil, cancer-linked", "IL-20: wound healing, tumor progression"
    ],
    "Agreement_Status": [
        "Strong_Agree", "Suggestive", "Strong_Agree", "Suggestive", "Strong_Agree",
        "Suggestive", "Strong_Agree", "Suggestive", "Strong_Agree", "Suggestive",
        "Strong_Agree", "Suggestive", "Suggestive", "Suggestive", "Suggestive",
        "Strong_Agree", "Suggestive",
        "Strong_Agree", "Strong_Agree", "Suggestive", "Strong_Agree",
        "Suggestive", "Suggestive", "Suggestive", "Suggestive", "Suggestive"
    ]
})

# Add pfold shift annotations
pfold_shift_dict = {
    "G_CSF": "Rapa-specific DOWN in Mut",
    "IL_1b": "Rapa-specific DOWN in Mut",
    "IL_5": "Rapa-specific DOWN in Mut",
    "IL_13": "Rapa-specific UP in Mut",
    "IFNb_1": "Rapa-specific DOWN in Mut",
    "IL_11": "Rapa-specific DOWN in Mut",
    "IL_20": "Rapa-specific DOWN in Mut",
    "MDC": "Rapa-specific DOWN in Mut",
    "MIP_3b": "Rapa-specific DOWN in Mut"
}

summary_table["pfold_shifts_model_baseline_corrected"] = summary_table["Cytokine"].map(pfold_shift_dict)

# Merge any duplicate rows by taking union of interpretations
summary_combined = summary_table.groupby("Cytokine").agg({
    "Interpretation_from_Model": lambda x: "; ".join(sorted(set(x))),
    "Interpretation_from_Literature": lambda x: "; ".join(sorted(set(x))),
    "Agreement_Status": lambda x: "; ".join(sorted(set(x))),
    "pfold_shifts_model_baseline_corrected": lambda x: "; ".join(sorted(set(i for i in x if pd.notna(i))))
}).reset_index()

In [22]:
summary_table

Unnamed: 0,Cytokine,Interpretation_from_Model,Interpretation_from_Literature,Agreement_Status,pfold_shifts_model_baseline_corrected
0,MIP_1a,Lower in Treated (WT),Pro-inflammatory; up in tumors,Strong_Agree,
1,Fractalkine,Lower in Treated (WT),Mixed: tumor suppressive & supportive,Suggestive,
2,MCP_1,Lower in Treated (WT),Pro-tumor macrophage recruitment,Strong_Agree,
3,IL_11,Lower in Treated (WT),"Supports tumor survival, chemoresistance",Suggestive,Rapa-specific DOWN in Mut
4,Eotaxin,Higher in Treated (WT),Increased after rapamycin (mice),Strong_Agree,
5,IL_3,Lower in Treated (WT),Growth factor in hematologic tumors,Suggestive,
6,TIMP_1,Higher in Treated (WT),"Matrix remodeling, poor prognosis",Strong_Agree,
7,LIX,Lower in Treated (WT),"Neutrophil recruitment, tumor-promoting",Suggestive,
8,IP_10,Higher in Treated (WT),Anti-tumor immune attractant,Strong_Agree,
9,IL_12p40,Higher in Treated (WT),"Pro-inflammatory cytokine, immune activator",Suggestive,


## why others were missed out and if correlation data tells us anything about it

In [23]:
# cancer biomarkers and treatment-related cytokines
cancer_biomarkers = [
    "TARC", "EPO", "IL_13", "IL_1b", "Fractalkine", "TIMP_1",
    "MIP_1a", "MDC", "IL_2", "IFNb_1"
]

treatment_related_cytokines = [
    "MIP_1a", "Fractalkine", "MCP_1", "IL_11", "Eotaxin", "IL_3", "TIMP_1",
    "LIX", "IP_10", "IL_12p40", "G_CSF", "MIP_3b", "KC", "MIP_3a", "IL_17", "IL_6", "IL_9"
]

df = pd.read_csv("cytokine_dataset_post_mice_imputation_expt46.csv")
exclude_cols = ["experiment_number", "timepoint", "mouse", "Genotype", "treatment", "observation"]
cytokine_cols = [col for col in df.columns if col not in exclude_cols]

#For cancer biomarkers — baseline only
df_before = df[df["timepoint"] == "observation before rapamycin"]
corr_matrix_baseline = df_before[cytokine_cols].corr()

In [24]:
#For cancer biomarkers — baseline only i.e. t=t0 (before)
df_before = df[df["timepoint"] == "observation before rapamycin"]
corr_matrix_baseline = df_before[cytokine_cols].corr()

#  correlations for each cancer biomarker
baseline_results = []
cutoff = 0.7

for cb in cancer_biomarkers:
    if cb not in corr_matrix_baseline.index:
        continue
    correlations = corr_matrix_baseline[cb].drop(cb)
    strong_corrs = correlations[correlations.abs() > cutoff]
    baseline_results.append({
        "Biomarker": cb,
        "Highly Correlated Cytokines": strong_corrs.sort_values(key=abs, ascending=False).index.tolist()
    })

baseline_df = pd.DataFrame(baseline_results)

# treatment-related cytokines — before + after
df_ba = df[df["timepoint"].isin(["observation before rapamycin", "observation after rapamycin"])]
corr_matrix_treatment = df_ba[cytokine_cols].corr()

treatment_results = []

for tc in treatment_related_cytokines:
    if tc not in corr_matrix_treatment.index:
        continue
    correlations = corr_matrix_treatment[tc].drop(tc)
    strong_corrs = correlations[correlations.abs() > cutoff]
    treatment_results.append({
        "Treatment Cytokine": tc,
        "Highly Correlated Cytokines": strong_corrs.sort_values(key=abs, ascending=False).index.tolist()
    })

treatment_df = pd.DataFrame(treatment_results)

treatment_df

Unnamed: 0,Treatment Cytokine,Highly Correlated Cytokines
0,MIP_1a,"[IL_2, IL_13, GM_CSF, VEGF, IL_12p70, IL_4]"
1,Fractalkine,[]
2,MCP_1,[]
3,IL_11,[]
4,Eotaxin,[]
5,IL_3,[]
6,TIMP_1,[]
7,LIX,[]
8,IP_10,[]
9,IL_12p40,[]


In [14]:
baseline_df

Unnamed: 0,Biomarker,Highly Correlated Cytokines
0,TARC,"[TIMP_1, Fractalkine]"
1,EPO,[]
2,IL_13,[]
3,IL_1b,[]
4,Fractalkine,"[TIMP_1, TARC]"
5,TIMP_1,"[Fractalkine, TARC]"
6,MIP_1a,"[VEGF, IL_2, IFNg, KC, IL_12p70]"
7,MDC,[]
8,IL_2,"[VEGF, MIP_1a, IFNg, IL_4, KC, IL_17]"
9,IFNb_1,[]
