<a href="https://colab.research.google.com/github/rajan-bhateja/Tolkienizer/blob/master/fine_tuning_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Install dependencies

In [2]:
!pip install -q --no-deps unsloth bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install -q sentencepiece protobuf "datasets>=3.4.1,<4.0.0" "huggingface_hub>=0.34.0" hf_transfer

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m47.6/47.6 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.4/43.4 MB[0m [31m22.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m307.6/307.6 kB[0m [31m27.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m511.9/511.9 kB[0m [31m40.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m181.9/181.9 kB[0m [31m18.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the foll

### Unsloth

In [3]:
from unsloth import FastLanguageModel
import torch
from google.colab import userdata

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use QLoRA; False for LoRA

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    token = userdata.get('HF_ACCESS_TOKEN')
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.8.5: Fast Llama patching. Transformers: 4.55.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

### LoRA Configurations

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 8,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
from unsloth.chat_templates import get_chat_template
import os

INSTRUCTION_PREFIX = "Convert to Tolkien style:"
RESPONSE_PREFIX    = "Tolkien style:"

def formatting_prompts_func(examples):
    prompts = examples["prompt"]
    completions = examples["completion"]
    texts = [
        f"{INSTRUCTION_PREFIX} {p}\n{RESPONSE_PREFIX} {c}\n"
        for p, c in zip(prompts, completions)
    ]
    return {"text": texts}

from datasets import load_dataset

file_path = "/content/qa_format_quotes.jsonl"
if os.path.exists(file_path):
    print(f"Attempting to load file from: {file_path}")
    dataset = load_dataset("json", data_files=file_path, split = "train")
    print("Dataset loaded successfully")
else:
    print(f"File not found at: {file_path}")

Attempting to load file from: /content/qa_format_quotes.jsonl
Dataset loaded successfully.


In [None]:
dataset["prompt"][0]

"Instruct: You are a helpful English assistant proficient in converting simple English sentences into Tolkien quotes. The sentence: This quote teaches us that true value and strength often aren't obvious. Something precious might not look flashy, and someone who seems to be aimlessly wandering might actually be on a meaningful path. It also emphasizes that things with deep, strong foundations will endure difficult times, just as deep roots are protected from the cold. Furthermore, it offers a hopeful message: even after destruction or despair, new life and light will emerge, what was broken will be restored, and those who lost their power or rightful place will eventually reclaim it."

In [None]:
dataset["completion"][0]

'All that is gold does not glitter,\nNot all those who wander are lost;\nThe old that is strong does not wither,\nDeep roots are not reached by the frost.\n\nFrom the ashes a fire shall be woken,\nA light from the shadows shall spring;\nRenewed shall be blade that was broken,\nThe crownless again shall be king.'

In [None]:
dataset = dataset.map(formatting_prompts_func, batched = True,)

if tokenizer.eos_token is None:
    tokenizer.add_special_tokens({"eos_token": "</s>"})
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token
model.resize_token_embeddings(len(tokenizer))

Embedding(128256, 3072, padding_idx=128004)

In [None]:
from trl import SFTConfig, SFTTrainer
from transformers import DataCollatorForSeq2Seq

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    packing = False, # Can make training 5x faster for short sequences.
    args = SFTConfig(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 3,
        # max_steps = 60,
        learning_rate = 5e-4,
        logging_steps = 10,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,  # random state, could be any integer
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,994 | Num Epochs = 3 | Total steps = 1,125
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 12,156,928 of 3,224,906,752 (0.38% trained)


Step,Training Loss
10,2.6523
20,1.9979
30,1.8791
40,1.8566
50,1.8098
60,1.8383
70,1.8081
80,1.8017
90,1.7905
100,1.7619


TrainOutput(global_step=1125, training_loss=1.5348957460191515, metrics={'train_runtime': 3237.4033, 'train_samples_per_second': 2.774, 'train_steps_per_second': 0.348, 'total_flos': 3.41886050002944e+16, 'train_loss': 1.5348957460191515})

In [None]:
torch.cuda.empty_cache()

### Run Inference

In [None]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer

model_path = "/content/outputs/checkpoint-1125/"  # where the trained model is in memory or temp folder
# tokenizer = AutoTokenizer.from_pretrained(model_path)
# model = AutoModelForCausalLM.from_pretrained(model_path)

# Load the model using FastLanguageModel
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_path, # Path to the saved model
    max_seq_length = max_seq_length,
    dtype = None, # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
    load_in_4bit = True, # Use QLoRA; False for LoRA
)


# Example prompt (match training style exactly)
prompt = (
    "Instruct: You are a helpful English assistant proficient in converting simple English into Tolkien quotes. "
    "The quote: The sun is setting"
)

inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

outputs = model.generate(
    **inputs,
    max_new_tokens=200,
    temperature=0.9,
    top_p=0.9,
    do_sample=True
)

print(tokenizer.decode(outputs[0], skip_special_tokens=True))

==((====))==  Unsloth 2025.8.5: Fast Llama patching. Transformers: 4.55.0.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB. GPU 0 has a total capacity of 14.74 GiB of which 16.12 MiB is free. Process 2742 has 14.72 GiB memory in use. Of the allocated memory 14.55 GiB is allocated by PyTorch, and 17.08 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

### Save the Model

#### Do this again after training again

In [5]:
# Save the merged LoRA weights as a full model
model.save_pretrained_merged("tolkienizer_model", tokenizer=tokenizer, save_method = "merged_4bit_forced",)
# tokenizer.save_pretrained("tolkienizer_model")



### Download Locally

In [None]:
import os
from google.colab import files

directory_path = "/content/tolkienizer_model"
zip_file_name = "tolkienizer_model.zip"
zip_file_path = os.path.join("/content", zip_file_name)

if os.path.isdir(directory_path):
    # Zip the directory
    !zip -r "$zip_file_path" "$directory_path"

    # Download the zipped file
    files.download(zip_file_path)
else:
    print(f"Directory not found: {directory_path}")

  adding: content/tolkienizer_model/ (stored 0%)
  adding: content/tolkienizer_model/adapter_model.safetensors (deflated 7%)
  adding: content/tolkienizer_model/tokenizer.json (deflated 85%)
  adding: content/tolkienizer_model/adapter_config.json (deflated 57%)
  adding: content/tolkienizer_model/special_tokens_map.json (deflated 71%)
  adding: content/tolkienizer_model/tokenizer_config.json (deflated 96%)
  adding: content/tolkienizer_model/README.md (deflated 65%)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>