In [1]:
# ----------------------------------------
# STEP 1: Install required libraries
# ----------------------------------------
!pip install -q transformers datasets peft accelerate bitsandbytes einops sentencepiece

# ----------------------------------------
# STEP 2: Mount Google Drive & copy SFT data
# ----------------------------------------
from google.colab import drive
drive.mount('/content/drive')

# Copy the SFT-formatted data file from Google Drive to local
!cp /content/drive/MyDrive/user1_sft.json /content/user1_sft.json

# ----------------------------------------
# STEP 3: Load TinyLlama model + tokenizer
# ----------------------------------------
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

base_model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(base_model_id)
tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    device_map="auto",
    load_in_8bit=True  # use quantization to save memory
)

# ----------------------------------------
# STEP 4: Load dataset & convert to tokenized format
# ----------------------------------------
import json
from datasets import Dataset

with open("/content/user1_sft.json", "r", encoding="utf-8") as f:
    raw_data = [json.loads(line) for line in f]

dataset = Dataset.from_list(raw_data)

# Tokenization
def tokenize(example):
    prompt = f"### Instruction:\n{example['instruction']}\n\n### Response:\n{example['output']}"
    tokens = tokenizer(prompt, truncation=True, padding="max_length", max_length=512)
    tokens["labels"] = tokens["input_ids"].copy()
    return tokens

tokenized_dataset = dataset.map(tokenize, remove_columns=dataset.column_names)

# ----------------------------------------
# STEP 5: Apply LoRA with PEFT
# ----------------------------------------
from peft import LoraConfig, get_peft_model, TaskType

lora_config = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=["q_proj", "v_proj"],  # modules to inject LoRA into
    lora_dropout=0.05,
    bias="none",
    task_type=TaskType.CAUSAL_LM
)

model = get_peft_model(model, lora_config)
model.print_trainable_parameters()

# ----------------------------------------
# STEP 6: Define training arguments and train
# ----------------------------------------
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./lora-tinyllama-user1",
    per_device_train_batch_size=4,
    gradient_accumulation_steps=2,
    num_train_epochs=3,
    logging_steps=10,
    save_steps=50,
    save_total_limit=1,
    bf16=torch.cuda.is_bf16_supported(),  # use BF16 if available
    report_to="none"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_dataset,
    tokenizer=tokenizer
)

trainer.train()

# ----------------------------------------
# STEP 7: Save final LoRA model adapter + tokenizer
# ----------------------------------------

output_path = "/content/drive/MyDrive/lora-tinyllama-user1"
model.save_pretrained(output_path)
tokenizer.save_pretrained(output_path)

print("✅ Training complete and model saved.")


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m10.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m67.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m59.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m42.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m7.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


model.safetensors:   0%|          | 0.00/2.20G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

Map:   0%|          | 0/344 [00:00<?, ? examples/s]

trainable params: 1,126,400 || all params: 1,101,174,784 || trainable%: 0.1023


  trainer = Trainer(
No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


Step,Training Loss
10,15.3754
20,12.517
30,8.1639
40,5.5878
50,3.5976
60,2.0541
70,1.0092
80,0.4898
90,0.345
100,0.2945




✅ Training complete and model saved.
