In [1]:
# Cell 0: Environment & installs (RunPod/Colab-safe pins)

!pip -q uninstall -y peft transformers accelerate datasets rouge-score sacrebleu sentencepiece pandas matplotlib || true

!pip -q install "transformers==4.42.3" "datasets==2.19.1" \
                "peft==0.11.1" "accelerate==0.33.0" \
                "sacrebleu==2.4.2" "rouge-score==0.1.2" \
                "pandas==2.2.2" "matplotlib==3.8.4" \
                "sentencepiece==0.2.0"

import os, torch, transformers, datasets, peft, accelerate
os.environ["TOKENIZERS_PARALLELISM"] = "false"
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:128"

print("✅ Installed versions:",
      "\n - transformers:", transformers.__version__,
      "\n - datasets    :", datasets.__version__,
      "\n - peft        :", peft.__version__,
      "\n - accelerate  :", accelerate.__version__)
print("CUDA available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))


[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m
✅ Installed versions: 
 - transformers: 4.42.3 
 - datasets    : 2.19.1 
 - peft        : 0.11.1 
 - accelerate  : 0.33.0
CUDA available: True
GPU: NVIDIA A40


In [2]:
# Cell 1: Imports & constants

import os, json, time, random, gc, re
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from datasets import load_dataset
from transformers import (
    AutoTokenizer, AutoModelForSeq2SeqLM,
    DataCollatorForSeq2Seq,
    Seq2SeqTrainingArguments, Seq2SeqTrainer,
    EarlyStoppingCallback, TrainerCallback, set_seed
)
from peft import LoraConfig, get_peft_model, TaskType, PeftModel
import sacrebleu
from rouge_score import rouge_scorer
from transformers.trainer_utils import EvalPrediction

SEED = 42
set_seed(SEED)

# ---- Model & data ----
BASE_MODEL_ID = "google/flan-t5-small"
DATASET_ID    = "cnn_dailymail"
DATASET_CONF  = "3.0.0"

# ---- Output dirs ----
RUN_DIR  = "outputs/summarizer_lora/cnndm_run"
BEST_DIR = os.path.join(RUN_DIR, "best_checkpoint")
ART_DIR  = os.path.join(RUN_DIR, "artifacts")
os.makedirs(BEST_DIR, exist_ok=True)
os.makedirs(ART_DIR,  exist_ok=True)

# ---- Subset sizes ----
TRAIN_N, VAL_N, TEST_N = 6000, 600, 600   # increase/decrease if needed

# ---- Sequence lengths ----
MAX_SRC_LEN = 512
MAX_TGT_LEN = 96   # try 128 later if you want longer summaries

# ---- Training schedule ----
EPOCHS = 1
BATCH  = 1
ACCUM  = 8
EVAL_STEPS = 75    # ~10 evals across ~750 steps

print("✅ Setup complete.")


✅ Setup complete.


In [3]:
# Cell 2: Load CNN/DailyMail (v3.0.0) and peek

print(f"Loading dataset '{DATASET_ID}' config '{DATASET_CONF}' …")
raw = load_dataset(DATASET_ID, DATASET_CONF)  # fields: article, highlights, id
print(raw)

ex = raw["train"][0]
print("\nKeys:", list(ex.keys()))
print("[article][:240]:", ex["article"][:240].replace("\n"," "), "…")
print("[highlights]:", ex["highlights"])


Loading dataset 'cnn_dailymail' config '3.0.0' …


Downloading readme: 0.00B [00:00, ?B/s]

Downloading data:   0%|          | 0.00/257M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/257M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/259M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/34.7M [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/30.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/287113 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/13368 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/11490 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 287113
    })
    validation: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 13368
    })
    test: Dataset({
        features: ['article', 'highlights', 'id'],
        num_rows: 11490
    })
})

Keys: ['article', 'highlights', 'id']
[article][:240]: LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in …
[highlights]: Harry Potter star Daniel Radcliffe gets £20M fortune as he turns 18 Monday .
Young actor says he has no plans to fritter his cash away .
Radcliffe's earnings from first five Potter films have been held in trust fund .


In [4]:
# Cell 3: Build balanced small splits (train/val/test), no overlap

def text_len(ds, i):
    return len(ds[i].get("article") or "")

def balanced_sample(ds, n, seed=SEED):
    n = min(n, len(ds))
    if n <= 0:
        return ds.select([])
    lengths = np.array([text_len(ds, i) for i in range(len(ds))])
    q1, q2 = np.quantile(lengths, [0.33, 0.66])
    idx_s = np.where(lengths <= q1)[0].tolist()
    idx_m = np.where((lengths > q1) & (lengths <= q2))[0].tolist()
    idx_l = np.where(lengths > q2)[0].tolist()
    rng = random.Random(seed)
    take = []
    for bucket, k in zip([idx_s, idx_m, idx_l], [n//3, n//3, n - 2*(n//3)]):
        take.extend(rng.sample(bucket, min(k, len(bucket))))
    rng.shuffle(take)
    return ds.select(take[:n])

train_small = balanced_sample(raw["train"],      TRAIN_N, seed=SEED)
val_small   = balanced_sample(raw["validation"], VAL_N,   seed=SEED+1)
test_small  = balanced_sample(raw["test"],       TEST_N,  seed=SEED+2)

print(f"Small splits -> train:{len(train_small)} val:{len(val_small)} test:{len(test_small)}")


Small splits -> train:6000 val:600 test:600


In [5]:
# Cell 4: Formatting (prefix 'summarize:' + label)

def fmt(ex):
    article = (ex.get("article") or "").strip()
    summary = (ex.get("highlights") or "").strip()
    return {
        "input_text": f"summarize: {article}",
        "label_text": summary
    }

fmt_train = train_small.map(fmt, remove_columns=train_small.column_names)
fmt_val   = val_small.map(fmt,   remove_columns=val_small.column_names)
fmt_test  = test_small.map(fmt,  remove_columns=test_small.column_names)

# Filter empty
def ok(e): return bool(e["input_text"].strip()) and bool(e["label_text"].strip())
fmt_train = fmt_train.filter(ok)
fmt_val   = fmt_val.filter(ok)
fmt_test  = fmt_test.filter(ok)

print("After format/filter ->",
      f"train:{len(fmt_train)} val:{len(fmt_val)} test:{len(fmt_test)}")


Map:   0%|          | 0/6000 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

Filter:   0%|          | 0/6000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/600 [00:00<?, ? examples/s]

Filter:   0%|          | 0/600 [00:00<?, ? examples/s]

After format/filter -> train:6000 val:600 test:600


In [6]:
# Cell 5: Tokenization (pad fixed; mask label pads with -100)

tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_ID)

def tok(batch):
    model_inputs = tokenizer(
        batch["input_text"],
        max_length=MAX_SRC_LEN,
        truncation=True,
        padding="max_length",
    )
    tgt = tokenizer(
        text_target=batch["label_text"],
        max_length=MAX_TGT_LEN,
        truncation=True,
        padding="max_length",
    )
    pad_id = tokenizer.pad_token_id
    labels = [[t if t != pad_id else -100 for t in seq] for seq in tgt["input_ids"]]
    model_inputs["labels"] = labels
    return model_inputs

tok_train = fmt_train.map(tok, batched=True, remove_columns=fmt_train.column_names)
tok_val   = fmt_val.map(  tok, batched=True, remove_columns=fmt_val.column_names)
tok_test  = fmt_test.map( tok, batched=True, remove_columns=fmt_test.column_names)

# Safety: drop rows with all -100 labels
def has_label_tokens(e): return any(t != -100 for t in e["labels"])
tok_train = tok_train.filter(has_label_tokens)
tok_val   = tok_val.filter(has_label_tokens)
tok_test  = tok_test.filter(has_label_tokens)

print("✅ Tokenized:", len(tok_train), len(tok_val), len(tok_test))


tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

Map:   0%|          | 0/6000 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

Map:   0%|          | 0/600 [00:00<?, ? examples/s]

Filter:   0%|          | 0/6000 [00:00<?, ? examples/s]

Filter:   0%|          | 0/600 [00:00<?, ? examples/s]

Filter:   0%|          | 0/600 [00:00<?, ? examples/s]

✅ Tokenized: 6000 600 600


In [7]:
# Cell 6: Model + LoRA (stable FP32)

base = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)  # FP32
base.config.use_cache = False
base.gradient_checkpointing_enable()

lora_cfg = LoraConfig(
    r=16, lora_alpha=32, lora_dropout=0.05,
    target_modules=["q","k","v","o"],
    bias="none",
    task_type=TaskType.SEQ_2_SEQ_LM,
)
model = get_peft_model(base, lora_cfg)
model.print_trainable_parameters()


config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/308M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

trainable params: 1,376,256 || all params: 78,337,408 || trainable%: 1.7568


In [8]:
# Cell 7: Collator, callbacks, robust metrics

collator = DataCollatorForSeq2Seq(tokenizer, model=model)

class MetricsRecorder(TrainerCallback):
    def __init__(self): self.rows = []
    def on_log(self, args, state, control, logs=None, **kw):
        if not logs: return
        row = {"step": int(state.global_step)}
        for k, v in logs.items():
            if k.startswith(("loss","eval_","learning_rate")):
                row[k] = float(v) if isinstance(v, (int,float)) else v
        self.rows.append(row)

class EmptyCacheCallback(TrainerCallback):
    def on_evaluate(self, args, state, control, **kwargs):
        torch.cuda.empty_cache(); gc.collect()

recorder = MetricsRecorder()

# Robust metrics (handles logits vs ids)
def compute_metrics(eval_pred: EvalPrediction):
    preds = getattr(eval_pred, "predictions", None)
    labels = getattr(eval_pred, "label_ids", None)
    if preds is None or labels is None:
        preds, labels = eval_pred

    if isinstance(preds, torch.Tensor): preds = preds.detach().cpu().numpy()
    if isinstance(labels, torch.Tensor): labels = labels.detach().cpu().numpy()

    arr = np.asarray(preds, dtype=object)
    if arr.dtype not in (np.int64, np.int32):
        try:
            preds = np.argmax(arr, axis=-1) if getattr(arr, "ndim", 0) == 3 else \
                    np.array([np.asarray(p).argmax(-1) for p in arr], dtype=np.int64)
        except Exception:
            preds = np.asarray(preds).astype(np.int64, copy=False)

    labels = np.where(labels != -100, labels, tokenizer.pad_token_id)
    decoded_preds  = tokenizer.batch_decode(preds,  skip_special_tokens=True)
    decoded_labels = tokenizer.batch_decode(labels, skip_special_tokens=True)

    decoded_preds  = [p.strip() for p in decoded_preds]
    decoded_labels = [l.strip() for l in decoded_labels]

    bleu = sacrebleu.corpus_bleu(decoded_preds, [decoded_labels]).score

    scorer = rouge_scorer.RougeScorer(["rouge1","rouge2","rougeLsum"], use_stemmer=True)
    r1=r2=rl=0.0
    n = len(decoded_labels)
    for ref, pred in zip(decoded_labels, decoded_preds):
        s = scorer.score(ref, pred)
        r1 += s["rouge1"].fmeasure
        r2 += s["rouge2"].fmeasure
        rl += s["rougeLsum"].fmeasure
    r1, r2, rl = 100*r1/n, 100*r2/n, 100*rl/n

    return {"bleu": bleu, "rouge1_f": r1, "rouge2_f": r2, "rougeL_f": rl}


In [9]:
# Cell 8: TrainingArguments (stable; frequent evals; early-stop on eval_loss)

args = Seq2SeqTrainingArguments(
    output_dir=RUN_DIR,
    num_train_epochs=EPOCHS,
    per_device_train_batch_size=BATCH,
    per_device_eval_batch_size=BATCH,
    gradient_accumulation_steps=ACCUM,

    # Stability knobs for T5
    optim="adafactor",
    learning_rate=1e-4,
    warmup_ratio=0.06,
    weight_decay=0.0,
    max_grad_norm=0.3,
    label_smoothing_factor=0.1,

    logging_steps=25,

    # Use alias you prefer:
    eval_strategy="steps",
    eval_steps=EVAL_STEPS,

    save_strategy="steps",
    save_steps=EVAL_STEPS,
    save_total_limit=2,
    load_best_model_at_end=True,

    # ✅ Use eval_loss for best checkpoint (more stable than early ROUGE)
    metric_for_best_model="eval_loss",
    greater_is_better=False,

    # memory-safe evaluation during training (greedy by default)
    predict_with_generate=True,
    generation_max_length=MAX_TGT_LEN,
    generation_num_beams=1,
    eval_accumulation_steps=64,
    group_by_length=True,

    include_inputs_for_metrics=False,
    report_to="none",

    # FP32 training
    fp16=False, bf16=False,

    dataloader_num_workers=0,
)


In [10]:
# Cell 9: Trainer & Train

def preprocess_logits_for_metrics(logits, labels):
    if isinstance(logits, tuple): logits = logits[0]
    return logits.argmax(dim=-1)

trainer_kwargs = dict(
    model=model,
    args=args,
    train_dataset=tok_train,
    eval_dataset=tok_val,
    data_collator=collator,
    compute_metrics=compute_metrics,
    callbacks=[recorder, EarlyStoppingCallback(early_stopping_patience=8), EmptyCacheCallback()],
    preprocess_logits_for_metrics=preprocess_logits_for_metrics,
)

# Prefer processing_class (new) -> fallback to tokenizer (older stacks)
try:
    trainer = Seq2SeqTrainer(processing_class=tokenizer, **trainer_kwargs)
except TypeError:
    trainer = Seq2SeqTrainer(tokenizer=tokenizer, **trainer_kwargs)

print("🚀 Starting training…")
t0 = time.time()
trainer.train()
print(f"⏱️ Training done in {(time.time()-t0)/60:.1f} min")
print("Best checkpoint:", trainer.state.best_model_checkpoint)


🚀 Starting training…


Step,Training Loss,Validation Loss,Bleu,Rouge1 F,Rouge2 F,Rougel F
75,4.0908,3.837052,0.0,0.0,0.0,0.0
150,3.9556,3.629176,0.0,0.0,0.0,0.0
225,3.8096,3.567265,0.0,0.0,0.0,0.0
300,3.8544,3.525495,0.0,0.0,0.0,0.0
375,3.799,3.502305,0.0,0.0,0.0,0.0
450,3.7484,3.487947,0.0,0.0,0.0,0.0
525,3.8056,3.479552,0.0,0.0,0.0,0.0
600,3.7617,3.475936,0.0,0.0,0.0,0.0
675,3.7344,3.475637,0.0,0.0,0.0,0.0
750,3.8668,3.474071,0.0,0.0,0.0,0.0


⏱️ Training done in 92.2 min
Best checkpoint: outputs/summarizer_lora/cnndm_run/checkpoint-750


In [11]:
# Cell 10: Save best LoRA adapters + tokenizer

print("💾 Saving best LoRA adapters…")
trainer.save_model(BEST_DIR)
tokenizer.save_pretrained(BEST_DIR)
print("✅ Saved to:", BEST_DIR)


💾 Saving best LoRA adapters…
✅ Saved to: outputs/summarizer_lora/cnndm_run/best_checkpoint


In [12]:
# Cell 11: Clean loss plots (train + eval), saved to ART_DIR

metrics_df = pd.DataFrame(recorder.rows).drop_duplicates("step").sort_values("step")
csv_path = os.path.join(ART_DIR, "training_metrics.csv")
metrics_df.to_csv(csv_path, index=False)
print("📈 Metrics CSV:", csv_path)

import numpy as np
def series_xy(frame, ycol):
    if ycol not in frame.columns: return None, None
    d = frame[["step", ycol]].dropna()
    if d.empty: return None, None
    return d["step"].astype(int).values, d[ycol].astype(float).values

# Plot: Loss
fig, ax = plt.subplots(figsize=(8,4.5))

x_tr, y_tr = series_xy(metrics_df, "loss")
if x_tr is not None:
    ax.plot(x_tr, y_tr, alpha=0.35, label="train_loss")
    tr_df = pd.DataFrame({"step": x_tr, "loss": y_tr})
    tr_df["ema"] = tr_df["loss"].ewm(span=10, adjust=False).mean()
    ax.plot(tr_df["step"], tr_df["ema"], label="train_loss (EMA)")

x_ev, y_ev = series_xy(metrics_df, "eval_loss")
if x_ev is not None:
    ax.plot(x_ev, y_ev, marker="o", linewidth=1.5, label="eval_loss")

ax.set_title("Loss vs Steps")
ax.set_xlabel("Step"); ax.set_ylabel("Loss")
ax.grid(True, alpha=0.2); ax.legend()
loss_png = os.path.join(ART_DIR, "loss_curve_clean.png")
plt.tight_layout(); plt.savefig(loss_png, dpi=150); plt.close()
print("🖼️ Saved:", loss_png)


📈 Metrics CSV: outputs/summarizer_lora/cnndm_run/artifacts/training_metrics.csv
🖼️ Saved: outputs/summarizer_lora/cnndm_run/artifacts/loss_curve_clean.png


In [13]:
# Cell 12: Final eval (greedy) + Beam eval (better summaries) + save results

print("Running final GREEDY evaluation on Validation and Test sets...")
final_val  = trainer.evaluate(eval_dataset=tok_val)   # keys: eval_*
final_test = trainer.evaluate(eval_dataset=tok_test)
print("📊 FINAL VAL (greedy):", final_val)
print("📊 FINAL TEST (greedy):", final_test)

print("\nRunning BEAM evaluation (num_beams=4, min_new_tokens=16)…")
beam_val = trainer.evaluate(
    eval_dataset=tok_val,
    metric_key_prefix="beam4",
    num_beams=4,
    max_length=MAX_TGT_LEN,
    min_new_tokens=16,
    no_repeat_ngram_size=3,
    length_penalty=1.0,
    early_stopping=True,
)
beam_test = trainer.evaluate(
    eval_dataset=tok_test,
    metric_key_prefix="beam4_test",
    num_beams=4,
    max_length=MAX_TGT_LEN,
    min_new_tokens=16,
    no_repeat_ngram_size=3,
    length_penalty=1.0,
    early_stopping=True,
)
print("🔁 VAL (beam=4):", beam_val)
print("🔁 TEST (beam=4):", beam_test)

# Save JSON summary for report
summary = {
    "dataset": {"id": DATASET_ID, "config": DATASET_CONF,
                "train_n": len(tok_train), "val_n": len(tok_val), "test_n": len(tok_test)},
    "model": BASE_MODEL_ID,
    "peft_lora": {"r": 16, "alpha": 32, "dropout": 0.05, "targets": ["q","k","v","o"]},
    "seq_lens": {"src": MAX_SRC_LEN, "tgt": MAX_TGT_LEN},
    "training": {
        "epochs": EPOCHS, "batch_per_device": BATCH, "grad_accum": ACCUM,
        "optim": "adafactor", "lr": 1e-4, "warmup_ratio": 0.06,
        "label_smoothing": 0.1, "max_grad_norm": 0.3,
        "best_checkpoint": trainer.state.best_model_checkpoint
    },
    "final_val_greedy": final_val,
    "final_test_greedy": final_test,
    "final_val_beam4": beam_val,
    "final_test_beam4": beam_test,
    "artifacts": {
        "metrics_csv": os.path.join(ART_DIR, "training_metrics.csv"),
        "loss_curve_png": os.path.join(ART_DIR, "loss_curve_clean.png"),
        "best_adapters_dir": BEST_DIR
    }
}
with open(os.path.join(ART_DIR, "run_summary.json"), "w") as f: json.dump(summary, f, indent=2)
with open(os.path.join(ART_DIR, "run_summary.txt"), "w") as f: f.write(json.dumps(summary, indent=2))
print("💾 Saved run summary to ART_DIR.")

# Bar chart for final BEAM metrics (VAL)
vals = {
    "BLEU":      beam_val.get("beam4_bleu", 0.0),
    "ROUGE-1 F": beam_val.get("beam4_rouge1_f", 0.0),
    "ROUGE-2 F": beam_val.get("beam4_rouge2_f", 0.0),
    "ROUGE-L F": beam_val.get("beam4_rougeL_f", 0.0),
}
plt.figure(figsize=(6.5,4))
plt.bar(list(vals.keys()), list(vals.values()))
plt.title("Final Validation Metrics (beam=4, min_new_tokens=16)")
plt.ylabel("Score")
plt.tight_layout()
bar_png = os.path.join(ART_DIR, "final_val_metrics_beam4.png")
plt.savefig(bar_png, dpi=150)
plt.close()
print("🖼️ Saved:", bar_png)


Running final GREEDY evaluation on Validation and Test sets...


📊 FINAL VAL (greedy): {'eval_loss': 3.4740705490112305, 'eval_bleu': 0.0, 'eval_rouge1_f': 0.0, 'eval_rouge2_f': 0.0, 'eval_rougeL_f': 0.0, 'eval_runtime': 537.5496, 'eval_samples_per_second': 1.116, 'eval_steps_per_second': 1.116, 'epoch': 1.0}
📊 FINAL TEST (greedy): {'eval_loss': 3.4630119800567627, 'eval_bleu': 0.0, 'eval_rouge1_f': 0.0, 'eval_rouge2_f': 0.0, 'eval_rougeL_f': 0.0, 'eval_runtime': 488.2553, 'eval_samples_per_second': 1.229, 'eval_steps_per_second': 1.229, 'epoch': 1.0}

Running BEAM evaluation (num_beams=4, min_new_tokens=16)…


early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled
early stopping required metric_for_best_model, but did not find eval_loss so early stopping is disabled


🔁 VAL (beam=4): {'beam4_loss': 3.4740705490112305, 'beam4_bleu': 0.0, 'beam4_rouge1_f': 0.0, 'beam4_rouge2_f': 0.0, 'beam4_rougeL_f': 0.0, 'beam4_runtime': 640.6021, 'beam4_samples_per_second': 0.937, 'beam4_steps_per_second': 0.937, 'epoch': 1.0}
🔁 TEST (beam=4): {'beam4_test_loss': 3.4630119800567627, 'beam4_test_bleu': 0.0, 'beam4_test_rouge1_f': 0.0, 'beam4_test_rouge2_f': 0.0, 'beam4_test_rougeL_f': 0.0, 'beam4_test_runtime': 655.0492, 'beam4_test_samples_per_second': 0.916, 'beam4_test_steps_per_second': 0.916, 'epoch': 1.0}
💾 Saved run summary to ART_DIR.
🖼️ Saved: outputs/summarizer_lora/cnndm_run/artifacts/final_val_metrics_beam4.png


In [14]:
# Cell 13: Sample predictions (beam=4) for qualitative examples

def gen_batch(texts, max_len=MAX_TGT_LEN, num_beams=4, min_new_tokens=16):
    mdl = trainer.model.eval()
    enc = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=MAX_SRC_LEN)
    if torch.cuda.is_available():
        mdl.cuda(); enc = {k: v.cuda() for k, v in enc.items()}
    with torch.no_grad():
        out = mdl.generate(
            **enc, num_beams=num_beams, max_length=max_len,
            min_new_tokens=min_new_tokens, no_repeat_ngram_size=3,
            length_penalty=1.0, early_stopping=True
        )
    return tokenizer.batch_decode(out, skip_special_tokens=True)

n = min(20, len(fmt_val))
sample_inputs  = [fmt_val[i]["input_text"] for i in range(n)]
sample_targets = [fmt_val[i]["label_text"] for i in range(n)]
preds = gen_batch(sample_inputs)

pred_df = pd.DataFrame({
    "input_preview": [s[:280].replace("\n"," ") + ("..." if len(s)>280 else "") for s in sample_inputs],
    "target_summary": sample_targets,
    "predicted_summary": [p.strip() for p in preds]
})
pred_csv = os.path.join(ART_DIR, "sample_predictions.csv")
pred_df.to_csv(pred_csv, index=False)
print("🧪 Sample predictions saved:", pred_csv)


🧪 Sample predictions saved: outputs/summarizer_lora/cnndm_run/artifacts/sample_predictions.csv


In [15]:
# Cell 14: Merge LoRA into base (single folder model)

merge_dir = os.path.join(RUN_DIR, "merged_full_model")
os.makedirs(merge_dir, exist_ok=True)

base_m = AutoModelForSeq2SeqLM.from_pretrained(BASE_MODEL_ID)
peft_m = PeftModel.from_pretrained(base_m, BEST_DIR)
peft_m = peft_m.merge_and_unload()
peft_m.config.use_cache = True
peft_m.save_pretrained(merge_dir, safe_serialization=True)
tokenizer.save_pretrained(merge_dir)
print("✅ Merged model saved to:", merge_dir)


✅ Merged model saved to: outputs/summarizer_lora/cnndm_run/merged_full_model


In [16]:
# Cell 15: Zip ALL artifacts (models + plots + logs) for download

import shutil, pathlib

# Create a manifest for convenience
manifest = {
    "run_dir": RUN_DIR,
    "adapters_dir": BEST_DIR,
    "merged_model_dir": os.path.join(RUN_DIR, "merged_full_model"),
    "artifacts_dir": ART_DIR,
    "files": {
        "metrics_csv": os.path.join(ART_DIR, "training_metrics.csv"),
        "loss_curve_png": os.path.join(ART_DIR, "loss_curve_clean.png"),
        "final_val_metrics_beam4_png": os.path.join(ART_DIR, "final_val_metrics_beam4.png"),
        "sample_predictions_csv": os.path.join(ART_DIR, "sample_predictions.csv"),
        "run_summary_json": os.path.join(ART_DIR, "run_summary.json"),
        "run_summary_txt": os.path.join(ART_DIR, "run_summary.txt"),
    },
}
with open(os.path.join(ART_DIR, "manifest.json"), "w") as f:
    json.dump(manifest, f, indent=2)
print("🧾 Wrote manifest:", os.path.join(ART_DIR, "manifest.json"))

zip_path = shutil.make_archive("summarizer_all_artifacts", "zip", root_dir=RUN_DIR)
print("📦 ZIP created at:", zip_path)


🧾 Wrote manifest: outputs/summarizer_lora/cnndm_run/artifacts/manifest.json
📦 ZIP created at: /workspace/summarizer_all_artifacts.zip
