Paths IN / OUT

In [1]:
import os
os.environ["TRANSFORMERS_NO_TF"] = "1"
os.environ["TRANSFORMERS_NO_FLAX"] = "1"
os.environ["USE_TF"] = "0"
os.environ["USE_FLAX"] = "0"
os.environ["JAX_PLATFORM_NAME"] = "cpu"


Imports

In [2]:
!pip -q install peft accelerate datasets

import os
import torch
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer, DataCollatorForLanguageModeling
from peft import LoraConfig, get_peft_model


Fonction de chargement modèle (SAFE)

In [3]:
BASE_OUT = "/kaggle/working/semantic-llm-pruning"

RECOVERY_MODELS = [
    ("heads20_mlp10", "/kaggle/input/03-structured-mlp-pruning-activation-based/semantic-llm-pruning/models/pruned_heads20_mlp10"),
    ("heads20_mlp20", "/kaggle/input/03-structured-mlp-pruning-activation-based/semantic-llm-pruning/models/pruned_heads20_mlp20"),
]


In [4]:
MODEL_MAX_LEN = 512

def format_example(ex):
    # Alpaca format standard
    instr = ex.get("instruction", "")
    inp = ex.get("input", "")
    out = ex.get("output", "")
    prompt = f"### Instruction:\n{instr}\n"
    if inp and isinstance(inp, str) and inp.strip():
        prompt += f"### Input:\n{inp}\n"
    prompt += f"### Response:\n{out}\n"
    return {"text": prompt}

ds = load_dataset("tatsu-lab/alpaca", split="train")
ds = ds.shuffle(seed=42).select(range(5000))  # petit subset Kaggle
ds = ds.map(format_example, remove_columns=ds.column_names)


README.md: 0.00B [00:00, ?B/s]

data/train-00000-of-00001-a09b74b3ef9c3b(…):   0%|          | 0.00/24.2M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/52002 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [5]:
def run_lora_recovery(tag, model_path):
    tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token

    def tokenize_fn(ex):
        enc = tok(
            ex["text"],
            truncation=True,
            max_length=MODEL_MAX_LEN,
            padding="max_length",
        )
        enc["labels"] = enc["input_ids"].copy()
        return enc

    tokenized = ds.map(tokenize_fn, batched=False, remove_columns=["text"])

    model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16).to("cuda")
    model.train()

    # LoRA sur attention + MLP (classique et efficace)
    lora_cfg = LoraConfig(
        r=8,
        lora_alpha=16,
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM",
        target_modules=[
            "q_proj","k_proj","v_proj","o_proj",
            "gate_proj","up_proj","down_proj"
        ],
    )
    model = get_peft_model(model, lora_cfg)
    model.print_trainable_parameters()

    args = TrainingArguments(
        output_dir=f"{BASE_OUT}/results/lora_recovery/{tag}",
        num_train_epochs=1,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=16,
        learning_rate=2e-4,
        fp16=True,
        logging_steps=20,
        save_steps=200,
        save_total_limit=2,
        report_to="none",
    )

    collator = DataCollatorForLanguageModeling(tok, mlm=False)

    trainer = Trainer(
        model=model,
        args=args,
        train_dataset=tokenized,
        data_collator=collator,
    )

    trainer.train()

    # Sauver l’adapter LoRA (léger)
    out_adapter = f"{BASE_OUT}/models/{tag}_lora_adapter"
    os.makedirs(out_adapter, exist_ok=True)
    trainer.model.save_pretrained(out_adapter)
    tok.save_pretrained(out_adapter)
    print("Saved LoRA adapter ->", out_adapter)

    # Option: sauver le modèle mergé (plus lourd, mais pratique pour eval)
    merged = trainer.model.merge_and_unload()
    out_merged = f"{BASE_OUT}/models/{tag}_lora_merged"
    os.makedirs(out_merged, exist_ok=True)
    merged.save_pretrained(out_merged)
    tok.save_pretrained(out_merged)
    print("Saved merged model ->", out_merged)

    return out_adapter, out_merged


In [None]:
saved = []
for tag, path in RECOVERY_MODELS:
    saved.append((tag, *run_lora_recovery(tag, path)))

saved


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

`torch_dtype` is deprecated! Use `dtype` instead!


trainable params: 5,807,296 || all params: 977,716,416 || trainable%: 0.5940


Step,Training Loss
20,1.7964
40,1.5008
60,1.4804
80,1.4519
100,1.4224
120,1.459
140,1.4035


Saved LoRA adapter -> /kaggle/working/semantic-llm-pruning/models/heads20_mlp10_lora_adapter
Saved merged model -> /kaggle/working/semantic-llm-pruning/models/heads20_mlp10_lora_merged


Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

trainable params: 5,510,032 || all params: 901,319,568 || trainable%: 0.6113


Step,Training Loss
20,1.9677
40,1.6263
60,1.597
80,1.561
100,1.5274
120,1.5577


In [None]:
def quick_generate(model_path, prompt="Give 2 bullet points about pruning.", max_new_tokens=80):
    tok = AutoTokenizer.from_pretrained(model_path, use_fast=True)
    if tok.pad_token is None:
        tok.pad_token = tok.eos_token
    m = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.float16).to("cuda").eval()
    inputs = tok(prompt, return_tensors="pt").to("cuda")
    out = m.generate(**inputs, max_new_tokens=max_new_tokens, do_sample=False)
    print(tok.decode(out[0], skip_special_tokens=True))

for tag, adapter, merged in saved:
    print("\n====", tag, "merged ====")
    quick_generate(merged)
