<a href="https://colab.research.google.com/github/rajan-bhateja/Tolkienizer/blob/master/llama_3_2_1b_instruct_fine_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Install Libraries & Frameworks

In [60]:
!pip install -q --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install -q sentencepiece protobuf "datasets>=3.4.1,<4.0.0" huggingface_hub hf_transfer
!pip install -q --no-deps unsloth

### Initialise the Model

In [61]:
from unsloth import FastLanguageModel
import torch
from google.colab import userdata

device = "cuda" if torch.cuda.is_available() else "cpu"
dtype = None              # None for auto detection
max_seq_length = 1024

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-1B-Instruct",    # using the Llama 3.2 1B Instruct Model
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = False,  # Use QLoRA; False for LoRA
    token = userdata.get('HF_ACCESS_TOKEN')
)

==((====))==  Unsloth 2025.8.9: Fast Llama patching. Transformers: 4.55.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


### Unrained Inference

In [62]:
from transformers import TextStreamer

def do_model_inference(question, max_new_tokens = 100):
  _ = model.generate(
      **tokenizer(
          question,
          return_tensors = "pt",
      ).to(device),
      max_new_tokens = max_new_tokens,
      temperature = 1.0,
      top_p = 0.95,
      top_k = 64,
      streamer = TextStreamer(tokenizer, skip_prompt = True),
  )

question = "Translate the following sentence into a J.R.R. Tolkien Quote: The sun is setting."
do_model_inference(question)

 I was once told, if one is very brave, that one might see the shadows grow dark. But how can a shadow be seen when it is just a darkness? Ah, but if one sees, one sees, for shadows are only shadows. The sun is setting. But let us not be blind to its light.

Note: In this translation, I've used Tolkien's characteristic style, with a more formal and poetic tone, which is reminiscent of Elvish and Old English languages. I


### LoRA Config

In [63]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

### Converting the dataset into Llama-supported format

In [64]:
import json

input_path = "/content/qa_format_quotes.jsonl"       # old dataset
output_path = "/content/quotes_chat.jsonl"           # new instruct-friendly dataset

with open(input_path, "r", encoding="utf-8") as infile, \
     open(output_path, "w", encoding="utf-8") as outfile:
    for line in infile:
        ex = json.loads(line)
        chat_example = {
            "messages": [
                {"role": "system", "content": "You are Tolkienizer, a model that rewrites text into J.R.R. Tolkien’s style."},
                {"role": "user", "content": ex["prompt"]},
                {"role": "assistant", "content": ex["completion"]}
            ]
        }
        outfile.write(json.dumps(chat_example, ensure_ascii=False) + "\n")

### Load Dataset

In [65]:
from datasets import load_dataset

dataset = load_dataset("json", data_files="quotes_chat.jsonl", split="train")

def format_example(example):
    return {
        "text": tokenizer.apply_chat_template(
            example["messages"],
            tokenize=False,
            add_generation_prompt=False
        )
    }

dataset = dataset.map(format_example, batched=True)
dataset

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/2556 [00:00<?, ? examples/s]

Dataset({
    features: ['messages', 'text'],
    num_rows: 2556
})

### Initialise the trainer

In [66]:
from trl import SFTConfig, SFTTrainer
from transformers import DataCollatorForSeq2Seq

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = 512,   # should match your dataset average length
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer, padding=True),
    packing = True,         # keep for efficiency if avg length is short
    args = SFTConfig(
        per_device_train_batch_size = 2,      # safe for Colab
        gradient_accumulation_steps = 4,      # effective batch size = 8
        warmup_ratio = 0.1,                   # better than fixed warmup steps
        num_train_epochs = 3,                 # let model see dataset fully
        # max_steps = -1,                     # use epochs instead of fixed steps
        learning_rate = 2e-5,                 # safer than 5e-4
        logging_steps = 10,
        optim = "adamw_8bit",                 # memory efficient
        weight_decay = 0.05,                  # slight regularization
        lr_scheduler_type = "cosine",         # smoother decay
        seed = 3407,
        report_to = "none",
        save_strategy = "epoch",              # save per epoch
    ),
)


Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/2556 [00:00<?, ? examples/s]

### Start Training

In [67]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,556 | Num Epochs = 3 | Total steps = 960
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 11,272,192 of 1,247,086,592 (0.90% trained)


Step,Training Loss
10,3.8933
20,3.8899
30,3.8263
40,3.6141
50,3.3872
60,3.1551
70,2.9873
80,2.7528
90,2.5114
100,2.1986


### Save the Model & Tokenizer

In [68]:
model.save_pretrained("Tolkienizer-3.2-1B-Instruct", tokenizer)

### Run Inference

#### Testing Prompts

1.  Rewrite this in Tolkien’s style: "The car drove quickly down the road."
2.  Rewrite this in Tolkien’s style: "I saw a bird sitting on a tree branch."
3.  Rewrite this in Tolkien’s style: "The computer is a powerful tool for work."
4.  Rewrite this in Tolkien’s style: "It was a cold and rainy day."
5.  Rewrite this in Tolkien’s style: "The future holds many possibilities."

In [98]:
from unsloth import FastLanguageModel
from transformers import AutoTokenizer, TextStreamer
import torch

model_path = "Tolkienizer-3.2-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model, _ = FastLanguageModel.from_pretrained(
    model_name = model_path,
    max_seq_length = 1024,
    dtype = None,
    load_in_4bit = True,
)

messages = [
    {"role": "system", "content": "You are Tolkienizer, a model that rewrites text into J.R.R. Tolkien’s style."},
    {"role": "user", "content": "Rewrite this in Tolkien’s style: The future holds many possibilites."},
]

inputs = tokenizer.apply_chat_template(messages, return_tensors="pt").to(model.device)

# Initialize the TextStreamer
streamer = TextStreamer(tokenizer, skip_prompt=True)

outputs = model.generate(inputs, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.9, streamer=streamer)
# No need to print the output here, the streamer handles it
# print(tokenizer.decode(outputs[0], skip_special_tokens=True))

==((====))==  Unsloth 2025.8.9: Fast Llama patching. Transformers: 4.55.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
<|start_header_id|>assistant<|end_header_id|>

But the road of the future is fraught with peril.<|eot_id|>


In [84]:
import os
from google.colab import files

directory_path = "/content/Tolkienizer-3.2-1B-Instruct"

if os.path.isdir(directory_path):
    for filename in os.listdir(directory_path):
        file_path = os.path.join(directory_path, filename)
        if os.path.isfile(file_path):
            print(f"Downloading {filename}...")
            files.download(file_path)
else:
    print(f"Directory not found: {directory_path}")

Downloading tokenizer.json...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading adapter_model.safetensors...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading README.md...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading chat_template.jinja...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading special_tokens_map.json...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading adapter_config.json...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Downloading tokenizer_config.json...


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>