In [None]:
MODEL = 't5-base'
BATCH_SIZE = 8
NUM_PROCS = 4
EPOCHS = 10
# OUT_DIR = 'results_t5base/2k_samples'
OUT_DIR = 'results_t5_large_regularized/10k_samples_fixed'
MAX_LENGTH = 1024 # Maximum context length to consider while preparing dataset.
epoch_metrics = []
DRIVE_DATA_PATH = "/content/drive/MyDrive/processed/10k_samples"   # UPDATE PATH
CLEAN_TEXT_COLUMN='article'
SUMMARY_COLUMN='highlights'

In [None]:
!pip install tensorboard
!pip install tensorboard-data-server
!pip install google-cloud-storage
!pip install tbparse matplotlib seaborn pandas numpy





In [None]:
# from google.colab import auth
# auth.authenticate_user()

In [None]:
# !mkdir -p /content/fixed_logs/
# !gsutil -m cp -r gs://models_checkpoint/models/results_t5_base_fixed/2k_samples/* /content/fixed_logs/

In [None]:
from google.colab import auth
auth.authenticate_user()

# Install gcsfuse
!echo "deb http://packages.cloud.google.com/apt gcsfuse-bionic main" > /etc/apt/sources.list.d/gcsfuse.list
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | apt-key add -
!apt -qq update
!apt -qq install gcsfuse

# Create a local directory for mounting
!mkdir results_t5_base_regularized
# models_regularized_run/models/results_t5_base/2k_samples
# Mount the GCS bucket
# Replace 'your-bucket-name' with the actual name of your GCS bucket
!gcsfuse --implicit-dirs models_regularized_run results_t5_base_regularized

In [None]:
# Example run directories
# models_regularized_run/models/results_t5_base/2k_samples
RUNS = {
    "t5_2k": "/content/results_t5_base_regularized/models/results_t5_base/2k_samples",
    # add more runs here
}

In [None]:
import pandas as pd
from tbparse import SummaryReader
import os

def load_run(run_path):
    reader = SummaryReader(run_path)
    df = reader.scalars  # TensorBoard scalars
    return df


In [None]:
def extract_metrics(run_name, run_path):
    import os
    import pandas as pd
    from tbparse import SummaryReader

    # Read event logs (auto-detect format: long or wide)
    reader = SummaryReader(run_path)
    df = reader.scalars

    # Detect long vs wide format
    is_long_format = "tag" in df.columns

    # Helper to extract values for a given tag in both formats
    def get_values(tag):
        if is_long_format:
            sub = df[df["tag"] == tag]
            return sub[["step", "value"]] if not sub.empty else None
        else:
            if tag in df.columns:
                # wide format: 'step' + tag column
                sub = df[["step", tag]].dropna()
                sub = sub.rename(columns={tag: "value"})
                return sub if not sub.empty else None
            else:
                return None

    # --- 1. TRAIN LOSS ---
    train_loss_raw = get_values("train/loss")
    if train_loss_raw is not None:
        train_loss = pd.DataFrame({
            "step": train_loss_raw["step"].values,
            "loss": train_loss_raw["value"].values
        })
    else:
        train_loss = None

    # --- 2. VALIDATION LOSS ---
    val_loss_raw = get_values("eval/loss")
    if val_loss_raw is not None:
        val_loss = pd.DataFrame({
            "step": val_loss_raw["step"].values,
            "loss": val_loss_raw["value"].values
        })
    else:
        val_loss = None

    # --- 3. ROUGE METRICS ---
    def get_last_metric(tag):
        m = get_values(tag)
        return m["value"].iloc[-1] if m is not None and not m.empty else None

    rouge1 = get_last_metric("eval/rouge1")
    rouge2 = get_last_metric("eval/rouge2")
    rougeL = get_last_metric("eval/rougeL")

    # --- 4. GPU USAGE ---
    gpu_raw = get_values("gpu_memory_gb")
    gpu_avg = gpu_raw["value"].mean() if gpu_raw is not None else None

    # --- 5. TRAINING TIME ---
    tt_raw = get_values("total_training_time_seconds")
    train_time = tt_raw["value"].iloc[0] if tt_raw is not None else None

    # --- 6. HALLUCINATION ---
    hall_path = os.path.join(run_path, "hallucination.csv")
    if os.path.exists(hall_path):
        hall_df = pd.read_csv(hall_path)
        hallucination = hall_df["hallucination_rate"].mean()
    else:
        hallucination = None

    # --- FINAL OUTPUT ---
    return {
        "run": run_name,
        "train_loss": train_loss,  # <--- always normalized DataFrame
        "val_loss": val_loss,      # <--- always normalized DataFrame
        "rouge1": rouge1,
        "rouge2": rouge2,
        "rougeL": rougeL,
        "gpu_avg": gpu_avg,
        "train_time_sec": train_time,
        "hallucination": hallucination
    }


In [None]:
all_metrics = []

for run_name, run_path in RUNS.items():
    metrics = extract_metrics(run_name, run_path)
    all_metrics.append(metrics)

summary_df = pd.DataFrame([{
    "run": m["run"],
    "rouge1": m["rouge1"],
    "rouge2": m["rouge2"],
    "rougeL": m["rougeL"],
    "gpu_avg": m["gpu_avg"],
    "train_time_min": m["train_time_sec"] / 60 if m["train_time_sec"] else None,
    "hallucination": m["hallucination"]
} for m in all_metrics])

summary_df


In [None]:
import matplotlib.pyplot as plt

for m in all_metrics:
    if m["train_loss"] is None or m["val_loss"] is None:
        continue

    plt.figure(figsize=(10,6))
    plt.plot(m["train_loss"]["step"], m["train_loss"]["loss"], label="Train Loss")
    plt.plot(m["val_loss"]["step"], m["val_loss"]["loss"], label="Validation Loss")

    plt.title(f"Loss Curve for {m['run']}")
    plt.xlabel("Step")
    plt.ylabel("Loss")
    plt.legend()
    plt.grid(True)
    plt.show()


In [None]:
import matplotlib.pyplot as plt

df_sorted = summary_df.sort_values("data_size")

plt.figure(figsize=(10,6))
plt.plot(df_sorted["data_size"], df_sorted["rouge1"], marker="o", label="ROUGE-1")
plt.plot(df_sorted["data_size"], df_sorted["rouge2"], marker="o", label="ROUGE-2")
plt.plot(df_sorted["data_size"], df_sorted["rougeL"], marker="o", label="ROUGE-L")

plt.xscale("log")
plt.xlabel("Training Data Size (log scale)")
plt.ylabel("ROUGE Score")
plt.title("ROUGE Score vs Training Data Size")
plt.legend()
plt.grid(True)
plt.show()


In [None]:
import seaborn as sns

# Extract data size from run name (assumes "t5_4k")
summary_df["data_size"] = summary_df["run"].apply(lambda x: int(x.split("_")[1].replace("k","000")))

plt.figure(figsize=(10,6))
plt.plot(summary_df["data_size"], summary_df["rouge1"], marker="o", label="ROUGE-1")
plt.plot(summary_df["data_size"], summary_df["rouge2"], marker="o", label="ROUGE-2")
plt.plot(summary_df["data_size"], summary_df["rougeL"], marker="o", label="ROUGE-L")
plt.xscale("log")
plt.xlabel("Training Data Size (log-scale)")
plt.ylabel("ROUGE Score")
plt.legend()
plt.title("ROUGE vs Training Data Size")
plt.show()


In [None]:
plt.figure(figsize=(10,6))
sns.barplot(data=summary_df, x="run", y="rouge1")
plt.title("ROUGE-1 Across Models")
plt.show()


In [None]:
plt.figure(figsize=(10,5))
sns.barplot(data=summary_df, x="run", y="hallucination")
plt.title("Hallucination Rate by Model")
plt.ylabel("Hallucination Rate")
plt.show()


In [None]:
fig, ax1 = plt.subplots(figsize=(10,6))

ax1.bar(summary_df["run"], summary_df["train_time_min"], color="blue", label="Training Time (min)")
ax1.set_ylabel("Training Time (min)", color="blue")

ax2 = ax1.twinx()
ax2.plot(summary_df["run"], summary_df["gpu_avg"], color="red", marker="o", label="GPU Memory (GB)")
ax2.set_ylabel("GPU Memory (GB)", color="red")

plt.title("Training Time and GPU Usage per Run")
plt.show()


In [None]:
%load_ext tensorboard
%tensorboard --logdir /content/tb_logs


In [None]:
# Load data from Google Drive
import pandas as pd

print("Loading data...")
train_df = pd.read_csv(f"{DRIVE_DATA_PATH}/train.csv").head(2000)
val_df = pd.read_csv(f"{DRIVE_DATA_PATH}/val.csv").head(200)
# test_df = pd.read_csv(f"{DRIVE_DATA_PATH}/test.csv")

# train_df, val_df = train_test_split(train_df, test_size=0.2, shuffle=True)

# train_df = train_df.dropna(subset=['Summary', 'clean_text'])
# val_df = val_df.dropna(subset=['Summary', 'clean_text'])

print("Train:", len(train_df))
print("Val:", len(val_df))
# print("Test:", len(test_df))

In [None]:
%load_ext tensorboard
%tensorboard --logdir $LOG_DIR