In [None]:
# Pro-level Notebook D (model-aware templates & GPU checks)
# More robust: model-aware prompts, GPU memory check, BitsAndBytes import, target module heuristics.

!pip install -q pandas transformers bitsandbytes peft

import os, json, datetime, torch
from pathlib import Path
import pandas as pd
from textwrap import dedent

# -------------------------
# Config / Paths
# -------------------------
FINAL_CSV = "/content/llmed_certification_FineTuneFlow/outputs/benchmarks/notebook_C/final_ranking.csv"
OUT_DIR = "/content/llmed_certification_FineTuneFlow/outputs/benchmarks/notebook_D_pro"
os.makedirs(OUT_DIR, exist_ok=True)

if not os.path.exists(FINAL_CSV):
    raise FileNotFoundError(f"Cannot find merged ranking CSV at {FINAL_CSV}. Run Notebook C first.")

df = pd.read_csv(FINAL_CSV, index_col=0)
print("Loaded final ranking:")
display(df.head())

# -------------------------
# Improved size map & infer
# -------------------------
size_hints = {
    "bart-large": "0.4B",
    "bart": "0.4B",
    "t5-large": "0.8B",
    "t5": "0.8B",
    "llama-1b": "1B",
    "llama-3b": "3B",
    "llama": "3B",
    "phi-3-mini": "4B",
    "phi": "4B",
}

def infer_model_size(model_name):
    key = model_name.lower().replace("/", "-")
    for k, v in size_hints.items():
        if k in key:
            return v
    return "unknown"

df["size_hint"] = df.index.map(infer_model_size)

# -------------------------
# Detect GPU mem (if available)
# -------------------------
def get_gpu_mem_gb():
    try:
        if not torch.cuda.is_available():
            return None
        prop = torch.cuda.get_device_properties(0)
        return prop.total_memory / 1e9
    except Exception:
        return None

gpu_mem = get_gpu_mem_gb()
print("Detected GPU mem (GB):", gpu_mem)

# -------------------------
# Model-aware recommendation logic
# -------------------------
def recommend_method(model_name, size_hint, gpu_mem_gb=None):
    ln = model_name.lower()
    if "bart" in ln or "t5" in ln:
        return "LoRA (PEFT) ‚Äî encoder‚Äìdecoder friendly"

    try:
        gb = float(size_hint.replace("b","").replace("B","").strip())
    except:
        return "QLoRA (recommended) / manual check"

    # refine by gpu_mem if known
    if gpu_mem_gb is not None:
        if gb <= 3 and gpu_mem_gb >= 24:
            return "LoRA or QLoRA (both possible on >=24GB)"
        if gb <= 4.5 and gpu_mem_gb >= 40:
            return "QLoRA (4-bit) on local GPU"
        if gb > 6 and gpu_mem_gb < 40:
            return "Hosted fine-tuning / QLoRA on A100/H100"

    # fallback rules
    if gb <= 1.5:
        return "LoRA or full fine-tune"
    if gb <= 4.5:
        return "QLoRA (4-bit) ‚Äî use local or cloud with >=40GB"
    if gb <= 8:
        return "QLoRA (4-bit) ‚Äî GPU with >=40GB recommended"
    return "Hosted fine-tuning / QLoRA on A100/H100"

def hyperparams_suggestion(model_name, size_hint):
    ln = model_name.lower()
    try:
        gb = float(size_hint.replace("b","").replace("B","").strip())
    except:
        gb = 3.0
    # model-aware tweaks
    if "t5" in ln:
        base_lr = 2e-4
    else:
        base_lr = 1e-4

    if gb <= 1.5:
        return {"epochs": 3, "micro_batch_size": 8, "lr": base_lr}
    if gb <= 4.5:
        return {"epochs": 3, "micro_batch_size": 4, "lr": base_lr / 2}
    if gb <= 8:
        return {"epochs": 3, "micro_batch_size": 1, "lr": base_lr / 2}
    return {"epochs": 2, "micro_batch_size": 1, "lr": base_lr / 5}

# -------------------------
# Top-K and recommendations
# -------------------------
TOP_K = 2
top_models = df.sort_values("composite_score", ascending=False).head(TOP_K)
print("\nTop selected models:")
display(top_models)

recommendations = {}
for model in top_models.index:
    size_hint = infer_model_size(model)
    method = recommend_method(model, size_hint, gpu_mem)
    hps = hyperparams_suggestion(model, size_hint)
    recommendations[model] = {
        "size_hint": size_hint,
        "method": method,
        "recommended_hyperparams": hps,
    }

# -------------------------
# Model-aware prompt templates and target module heuristics
# -------------------------
def prompt_template_for_model(model_name):
    ln = model_name.lower()
    if "llama" in ln or "meta-llama" in ln:
        return lambda text: f"[INST] Summarize the conversation:\n{text} [/INST]"
    if "phi" in ln or "microsoft" in ln:
        return lambda text: f"<|system|>Summarize the conversation.<|end|>\\n{text}\\n<|assistant|>"
    if "t5" in ln or "flan" in ln:
        return lambda text: f"summarize: {text}"
    return lambda text: text

def guess_target_modules(model_name):
    ln = model_name.lower()
    # conservative defaults
    if "t5" in ln or "bart" in ln:
        return ["q_proj", "v_proj"]
    if "llama" in ln or "phi" in ln:
        return ["q_proj", "v_proj"]
    return ["q_proj", "v_proj"]

# -------------------------
# Write plan + scripts with improved safety
# -------------------------
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
plan_lines = ["# Fine-tuning Plan (Auto-Generated)", "", f"Generated: {timestamp}", ""]

for i, (name, row) in enumerate(top_models.iterrows(), start=1):
    rec = recommendations[name]
    plan_lines.append(f"### {i}. {name}")
    plan_lines.append(f"- Composite score: {row.get('composite_score', 0):.4f}")
    plan_lines.append(f"- ROUGE-L: {row.get('rougeL', 0):.2f}%")
    plan_lines.append(f"- Inferred size: {rec['size_hint']}")
    plan_lines.append(f"- Detected GPU mem (GB): {gpu_mem}")
    plan_lines.append(f"- Recommended method: **{rec['method']}**")
    plan_lines.append(f"- Hyperparameters: `{rec['recommended_hyperparams']}`")
    plan_lines.append("")

plan_path = Path(OUT_DIR) / f"finetune_plan_pro_{timestamp}.md"
with plan_path.open("w") as f:
    f.write("\n".join(plan_lines))

print(f"\n‚úî Fine-tuning plan written to {plan_path}")

# templates for scripts (similar to Option 1 but with BitsAndBytes import and prompt usage)
from textwrap import indent

def make_lora_script(model_id, data_path, out_dir, target_modules, hps):
    return dedent(f"""\
    # Auto-generated LoRA script (encoder-decoder)
    from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TrainingArguments, Trainer
    from peft import LoraConfig, get_peft_model
    from datasets import load_dataset

    MODEL = "{model_id}"
    DATASET_PATH = "{data_path}"
    OUTPUT_DIR = "{out_dir}"

    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    model = AutoModelForSeq2SeqLM.from_pretrained(MODEL)

    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    lora_cfg = LoraConfig(r=8, lora_alpha=32, target_modules={target_modules}, lora_dropout=0.05)
    model = get_peft_model(model, lora_cfg)

    ds = load_dataset("json", data_files={{"train": DATASET_PATH}})["train"]

    def tokenize_fn(example):
        out = tokenizer(example["dialogue"], truncation=True, max_length=768)
        labels = tokenizer(example["summary"], truncation=True, max_length=192).input_ids
        out["labels"] = labels
        return out

    train_ds = ds.map(tokenize_fn, remove_columns=ds.column_names)

    training_args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        per_device_train_batch_size={hps['micro_batch_size']},
        num_train_epochs={hps['epochs']},
        learning_rate={hps['lr']},
        fp16=True,
        save_strategy="no",
    )

    trainer = Trainer(model=model, args=training_args, train_dataset=train_ds)
    trainer.train()
    model.save_pretrained(OUTPUT_DIR)
    """)

def make_q_lora_script(model_id, data_path, out_dir, target_modules, hps):
    return dedent(f"""\
    # Auto-generated QLoRA script (decoder-only)
    import transformers
    from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, BitsAndBytesConfig
    from peft import LoraConfig, prepare_model_for_kbit_training, get_peft_model
    from datasets import load_dataset

    MODEL = "{model_id}"
    DATASET_PATH = "{data_path}"
    OUTPUT_DIR = "{out_dir}"

    tokenizer = AutoTokenizer.from_pretrained(MODEL)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    bnb = BitsAndBytesConfig(load_in_4bit=True)
    model = AutoModelForCausalLM.from_pretrained(MODEL, device_map="auto", quantization_config=bnb)
    model = prepare_model_for_kbit_training(model)

    lora_cfg = LoraConfig(r=8, lora_alpha=32, target_modules={target_modules}, lora_dropout=0.05)
    model = get_peft_model(model, lora_cfg)

    ds = load_dataset("json", data_files={{"train": DATASET_PATH}})["train"]

    def tokenize_fn(example):
        prompt = example.get("prompt", example["dialogue"])
        tok = tokenizer(prompt, truncation=True, max_length=768)
        labels = tokenizer(example["summary"], truncation=True, max_length=192).input_ids
        tok["labels"] = labels
        return tok

    train_ds = ds.map(tokenize_fn, remove_columns=ds.column_names)

    training_args = TrainingArguments(
        output_dir=OUTPUT_DIR,
        per_device_train_batch_size={hps['micro_batch_size']},
        num_train_epochs={hps['epochs']},
        learning_rate={hps['lr']},
        fp16=True,
        save_strategy="no",
    )

    trainer = Trainer(model=model, args=training_args, train_dataset=train_ds)
    trainer.train()
    model.save_pretrained(OUTPUT_DIR)
    """)

# Emit scripts
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
DATASET_PATH = "./highlightsum_train.jsonl"
for model in top_models.index:
    rec = recommendations[model]
    target_modules = guess_target_modules(model)
    hps = rec["recommended_hyperparams"]
    outdir = f"./ft_outputs/{model.replace('/', '_')}_{timestamp}"
    if "LoRA" in rec["method"] and ("encoder" in rec["method"].lower() or "encoder" in rec["method"]):
        content = make_lora_script(model, DATASET_PATH, outdir, target_modules, hps)
        fname = Path(OUT_DIR) / f"train_lora_{model.replace('/', '_')}_{timestamp}.py"
    else:
        content = make_q_lora_script(model, DATASET_PATH, outdir, target_modules, hps)
        fname = Path(OUT_DIR) / f"train_q_lora_{model.replace('/', '_')}_{timestamp}.py"
    with open(fname, "w") as f:
        f.write(content)

# Save metadata
with open(Path(OUT_DIR) / f"recommendations_pro_{timestamp}.json", "w") as f:
    json.dump(recommendations, f, indent=2)

print("\nüìÅ Outputs written to:", OUT_DIR)
print("Files:\n ", "\n ".join(sorted(os.listdir(OUT_DIR))))


[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m59.4/59.4 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hLoaded final ranking:


Unnamed: 0,model_id,rouge1,rouge2,rougeL,time,throughput,efficiency,composite_score
BART-large,facebook/bart-large-cnn,28.105383,9.183429,21.062636,101.631836,1.967887,0.207244,1.230694
LLaMA-1B,meta-llama/Llama-3.2-1B-Instruct,28.635874,9.618125,21.205387,393.929495,0.507705,0.05383,0.46323
LLaMA-3B,meta-llama/Llama-3.2-3B-Instruct,23.771793,8.222793,17.306203,748.223488,0.2673,0.02313,-0.162342
Phi-3-Mini,microsoft/Phi-3-mini-4k-instruct,20.550442,7.028457,14.306677,987.636199,0.202504,0.014486,-0.571619
T5-large,t5-large,10.977282,1.944009,9.636944,263.027842,0.760376,0.036638,-0.959962


Detected GPU mem (GB): 15.828320256

Top selected models:


Unnamed: 0,model_id,rouge1,rouge2,rougeL,time,throughput,efficiency,composite_score,size_hint
BART-large,facebook/bart-large-cnn,28.105383,9.183429,21.062636,101.631836,1.967887,0.207244,1.230694,0.4B
LLaMA-1B,meta-llama/Llama-3.2-1B-Instruct,28.635874,9.618125,21.205387,393.929495,0.507705,0.05383,0.46323,1B



‚úî Fine-tuning plan written to /content/llmed_certification_FineTuneFlow/outputs/benchmarks/notebook_D_pro/finetune_plan_pro_20251202_123700.md

üìÅ Outputs written to: /content/llmed_certification_FineTuneFlow/outputs/benchmarks/notebook_D_pro
Files:
  finetune_plan_pro_20251202_123700.md
 recommendations_pro_20251202_123700.json
 train_lora_BART-large_20251202_123700.py
 train_q_lora_LLaMA-1B_20251202_123700.py
