<a href="https://colab.research.google.com/github/sammyzane2/images2/blob/main/kenyan_stories_finetunning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Installation

In [9]:
# ✅ One-shot install for Unsloth on Google Colab (T4 GPU compatible)
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth



Accelerator initialization

In [10]:
from accelerate import Accelerator

# Initialize Accelerator explicitly
accelerator = Accelerator()


Dataset loading and fallback handling

In [11]:
import json
from datasets import Dataset

max_seq_length = 2048

try:
    with open("kenyan_stories.json", "r") as f:
        data = json.load(f)
    dataset = Dataset.from_list(data)
except FileNotFoundError:
    print("Error: kenyan_stories.json not found.")
    dataset = Dataset.from_dict({
        'instruction': ["This is a dummy instruction."],
        'response': ["This is a dummy response."]
    })
except json.JSONDecodeError:
    print("Error: Could not decode kenyan_stories.json. Make sure it is valid JSON.")
    dataset = Dataset.from_dict({
        'instruction': ["This is a dummy instruction."],
        'response': ["This is a dummy response."]
    })


Model selection and device checking

In [12]:
import torch

fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",
    "unsloth/Mistral-Small-Instruct-2409",
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",
    "unsloth/Llama-3.2-1B-bnb-4bit",
    "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
    "unsloth/Llama-3.2-3B-bnb-4bit",
    "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    "unsloth/Llama-3.3-70B-Instruct-bnb-4bit"
]

model_name_to_load = "unsloth/Llama-3.2-1B-bnb-4bit"

if not torch.cuda.is_available():
    print("CUDA is not available. Training will not be possible on GPU.")
else:
    print(f"CUDA is available. Using device: {torch.cuda.current_device()}")


CUDA is available. Using device: 0


Model loading and LoRA intergration

In [13]:
# %%
from unsloth import FastLanguageModel, FastModel

# This is the line that caused the error previously
model, tokenizer = FastModel.from_pretrained(
    model_name=model_name_to_load,
    max_seq_length=max_seq_length,
    load_in_4bit=True,
    load_in_8bit=False,
    full_finetuning=False,
)

# This applies the PEFT model after loading the base model
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    max_seq_length=max_seq_length,
    use_rslora=False,
    loftq_config=None,
)

==((====))==  Unsloth 2025.6.2: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.5.1+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.1.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.03G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/230 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/50.6k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/459 [00:00<?, ?B/s]

Unsloth: Making `model.base_model.model.model` require gradients


Dataset Introspectiona and formatting function

In [14]:
print(dataset.features)

def formatting_func(examples):
    output_texts = []
    for i in range(len(examples['instruction'])):
        text = f"### Instruction:\n{examples['instruction'][i]}\n### Response:\n{examples['response'][i]}"
        output_texts.append(text)
    return output_texts


{'instruction': Value(dtype='string', id=None), 'response': Value(dtype='string', id=None)}


Trainer configuration and training execution

In [None]:
from trl import SFTTrainer, SFTConfig

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset,
    tokenizer=tokenizer,
    formatting_func=formatting_func,
    args=SFTConfig(
        max_seq_length=max_seq_length,
        per_device_train_batch_size=1,
        gradient_accumulation_steps=8,
        warmup_steps=10,
        max_steps=80,
        logging_steps=1,
        output_dir="outputs",
        optim="adamw_8bit",
        seed=3407,
        report_to="none",
    ),
)

trainer.train()


In [None]:
from transformers import TextStreamer

# Add this after training
streamer = TextStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)

prompt = "The Origin of Cattle [Maasai]"

# Generate response
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
_ = model.generate(
    **inputs,
    streamer=streamer,
    max_new_tokens=200,
    temperature=0.7,
    top_p=0.9,
    repetition_penalty=1.1,
    do_sample=True,
)
