# 02_exploratory_analysis
Exploratory plots and descriptive summaries.


In [None]:
import os, sys
repo_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
src_path = os.path.join(repo_root, "src")
if src_path not in sys.path:
    sys.path.insert(0, src_path)

import pandas as pd
from data_utils import load_csv
from viz_utils import plot_timecourse, plot_group_boxplot


In [None]:
PROCESSED_DIR = os.path.join(repo_root, "data", "processed")
clinical_fp = os.path.join(PROCESSED_DIR, "clinical_cleaned.csv")
survival_fp = os.path.join(PROCESSED_DIR, "survival_cleaned.csv")

df_clin = pd.read_csv(clinical_fp)
df_surv = pd.read_csv(survival_fp)
print("clinical:", df_clin.shape, "survival:", df_surv.shape)
df_clin.head()


In [None]:
# For the demo data the columns might be animalid, group, day, weight or similar
# If your file has different column names, adjust arguments below.
# Prepare a long-form dataframe for plotting
if "weight" in df_clin.columns:
    plot_timecourse(df_clin, x="day", y="weight", hue="group", title="Weight time-course (mean ± SD)",
                    savepath=os.path.join(repo_root, "results", "figures", "weight_timecourse.png"))
else:
    print("No 'weight' column in clinical file. Head columns:", df_clin.columns.tolist())


In [None]:
# Example: plot distribution of a variable across groups
val_col = "weight" if "weight" in df_clin.columns else df_clin.columns[-1]
plot_group_boxplot(df_clin, x="group", y=val_col, title=f"{val_col} by group",
                   savepath=os.path.join(repo_root, "results", "figures", f"{val_col}_by_group.png"))


In [None]:
from stats_utils import build_km_by_group
from viz_utils import plot_km

km_dict = build_km_by_group(df_surv, time_col="time", event_col="event", group_col="group")
plot_km(km_dict, title="Kaplan-Meier: demo groups",
        savepath=os.path.join(repo_root, "results", "figures", "km_demo.png"))
