# 🦙 Instruction Fine-Tuning LLaMA-3.2-1B-Instruct with Chomsky Dataset

This notebook helps you fine-tune LLaMA-3.2-1B-Instruct (4-bit) using enriched question-answer data from Chomsky, using instruction-style tuning and LoRA (PEFT).

In [1]:
# ✅ Install required libraries
!pip install -q transformers datasets peft accelerate bitsandbytes

In [2]:
# 📥 Load and format dataset
import json
from datasets import Dataset

# Load your enriched dataset (place it in the same folder as this notebook)
with open("enriched_data_merged.json", "r", encoding="utf-8") as f:
    data = json.load(f)

def format_example(example):
    context = f"Year: {example['year']} | Setting: {example['setting']} | Persona: {example['persona']} | Topic: {example['topic']}"
    return {
        "text": f"""### Context:
{context}

### Instruction:
{example['instruction']}

### Input:
{example['input']}

### Response:
{example['output']}"""
    }

formatted_data = [format_example(d) for d in data]
dataset = Dataset.from_list(formatted_data)

In [3]:
# 🧠 Load model in 4-bit
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import prepare_model_for_kbit_training
import torch

model_id = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"  # Change to 1B variant if you have local access

tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    load_in_4bit=True,
    device_map="auto",
    torch_dtype=torch.float16
)

model = prepare_model_for_kbit_training(model)

tokenizer_config.json:   0%|          | 0.00/54.7k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.52k [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/1.03G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

In [4]:
# 🪝 LoRA config
from peft import LoraConfig, get_peft_model

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj"]
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

trainable params: 851,968 || all params: 1,236,666,368 || trainable%: 0.0689


In [5]:
# ✂️ Tokenize dataset
def tokenize(example):
    return tokenizer(example["text"], padding="max_length", truncation=True, max_length=512)

tokenized_dataset = dataset.map(tokenize, batched=True)

Map:   0%|          | 0/8519 [00:00<?, ? examples/s]

In [6]:
# 🏃 Train the model
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./llama3_chomsky_lora",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    logging_steps=10,
    learning_rate=2e-4,
    bf16=True,
    save_strategy="epoch",
    report_to="none"
)

trainer = Trainer(
    model=model,
    tokenizer=tokenizer,
    args=training_args,
    train_dataset=tokenized_dataset
)

trainer.train()

  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.
`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
  return fn(*args, **kwargs)


ValueError: The model did not return a loss from the inputs, only the following keys: logits. For reference, the inputs it received are input_ids,attention_mask.

In [None]:
# 💾 Save model
model.save_pretrained("./llama3_chomsky_lora")
tokenizer.save_pretrained("./llama3_chomsky_lora")

In [None]:
# ✅ Inference
from transformers import pipeline

pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)

prompt = """### Context:
Year: 2020 | Setting: Oxford Debate | Persona: Historian | Topic: Humanities

### Instruction:
Respond as a historian in a Oxford Debate discussing Humanities

### Input:
How do you view the significance of this moment in human history?

### Response:
"""

output = pipe(prompt, max_new_tokens=150, do_sample=True, temperature=0.7)[0]["generated_text"]
print(output)