In [1]:
%%capture
import torch
major_version, minor_version = torch.cuda.get_device_capability()
# Must install separately since Colab has torch 2.2.1, which breaks packages
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
if major_version >= 8:
    # Use this for new GPUs like Ampere, Hopper GPUs (RTX 30xx, RTX 40xx, A100, H100, L40)
    !pip install --no-deps packaging ninja einops flash-attn xformers trl peft accelerate bitsandbytes
else:
    # Use this for older GPUs (V100, Tesla T4, RTX 20xx)
    !pip install --no-deps xformers trl peft accelerate bitsandbytes
pass

In [2]:
!pip install transformers  # Hugging Face library for NLP model handling
!pip install datasets  # Library for loading and managing datasets



In [3]:
# Import required modules for model management, dataset handling, and fine-tuning
import torch
from unsloth import FastLanguageModel  # High-performance language model utilities
from unsloth.chat_templates import get_chat_template  # Chat template utility
from datasets import load_dataset  # For loading datasets
from trl import SFTTrainer  # Supervised fine-tuning trainer
from transformers import TrainingArguments  # Configuration for training process
from unsloth.chat_templates import standardize_sharegpt  # Dataset standardization

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.7.0+cu126 with CUDA 1206 (you have 2.6.0+cu124)
    Python  3.11.12 (you have 3.11.12)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!


In [4]:
max_seq_length = 2048
dtype = None
load_in_4bit = True

In [5]:
!pip install huggingface_hub
#!pip install kaggle_secrets



In [6]:
# Load the pre-trained Llama-3.2 model with 3 billion parameters, optimized for instruction following
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="unsloth/Llama-3.2-3B-Instruct",
    dtype=dtype,
    max_seq_length=2048,
    load_in_4bit=True,  # Use 4-bit precision for efficient memory usage
    token="hf_xxx"
)

==((====))==  Unsloth 2025.4.8: Fast Llama patching. Transformers: 4.51.3.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = True]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [7]:
# Apply PEFT to reduce training resource requirements
model = FastLanguageModel.get_peft_model(
    model,
    r=16, # Bottleneck dimension for fine-tuning
    target_modules=[
        "q_proj", "k_proj", "v_proj", "o_proj", # Attention projection layers
        "gate_proj", "up_proj", "down_proj",  # Feedforward layers
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

Unsloth 2025.4.8 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [8]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

EOS_TOKEN = tokenizer.eos_token
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs       = examples["input"]
    outputs      = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
#dataset = load_dataset("yahma/alpaca-cleaned", split = "train")
dataset = load_dataset("json", data_files="geosignal.json", split="train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

In [9]:
len(dataset)

39749

In [10]:
dataset[0]

{'instruction': 'Why is California the best state?',
 'input': '',
 'output': 'California has the best landscape in the country. We have some of the best mountains to ski on, most iconic beach spots, and richest soil for agriculture. What else could you ask for?',
 'type': 'dolly',
 'category': None,
 'text': 'Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWhy is California the best state?\n\n### Input:\n\n\n### Response:\nCalifornia has the best landscape in the country. We have some of the best mountains to ski on, most iconic beach spots, and richest soil for agriculture. What else could you ask for?<|eot_id|>'}

In [41]:
dataset[10000]

{'instruction': "What details can you provide on the Mtotosaurus's detail?",
 'input': '',
 'output': 'The detail of the Mtotosaurus is the year of it is 2003, the autor of it is Maier, the references of it is Maier, G (2003) African Dinosaurs Unearthed: The Tendaguru Expeditions (Life of the Past).',
 'type': 'geo',
 'category': 'metaearth.dinosaur.qa',
 'text': "Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat details can you provide on the Mtotosaurus's detail?\n\n### Input:\n\n\n### Response:\nThe detail of the Mtotosaurus is the year of it is 2003, the autor of it is Maier, the references of it is Maier, G (2003) African Dinosaurs Unearthed: The Tendaguru Expeditions (Life of the Past).<|eot_id|>"}

In [11]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    args=TrainingArguments(
        per_device_train_batch_size=16,
        gradient_accumulation_steps=4,
        # Use num_train_epochs = 1, warmup_ratio for full training runs!
        #warmup_steps=5,
        #max_steps=60,
        num_train_epochs=1,
        warmup_ratio=0.1,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
    ),
)

In [12]:
# Perform fine-tuning on the model
trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 39,749 | Num Epochs = 1 | Total steps = 621
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 4 x 1) = 64
 "-____-"     Trainable parameters = 24,313,856/3,000,000,000 (0.81% trained)
[34m[1mwandb[0m: Currently logged in as: [33mcharson-hu[0m ([33mfwi[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
10,2.603
20,2.3826
30,1.9724
40,1.8484
50,1.7216
60,1.7069
70,1.6699
80,1.6391
90,1.5714
100,1.7017


TrainOutput(global_step=621, training_loss=1.5855082048886064, metrics={'train_runtime': 2916.6239, 'train_samples_per_second': 13.628, 'train_steps_per_second': 0.213, 'total_flos': 4.029901487430697e+17, 'train_loss': 1.5855082048886064})

In [13]:
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [14]:
# Enable optimized inference
FastLanguageModel.for_inference(model)

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): LlamaForCausalLM(
      (model): LlamaModel(
        (embed_tokens): Embedding(128256, 3072, padding_idx=128004)
        (layers): ModuleList(
          (0): LlamaDecoderLayer(
            (self_attn): LlamaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=3072, out_features=3072, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Identity()
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=3072, out_features=16, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=16, out_features=3072, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
                (lora_magnitude_vector): ModuleDict()
              )
              (k_proj): lora.Linear

In [15]:
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Why does china have more earthquakes?", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWhy does china have more earthquakes?\n\n### Input:\n\n\n### Response:\nChina has more earthquakes than the United States because it is a long, thin country, and the fault lines run along the edge of the country.<|eot_id|>']

In [16]:
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Why is california the best state?", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWhy is california the best state?\n\n### Input:\n\n\n### Response:\nCalifornia is the best state for many reasons. First, it is the most populated state, with over 39 million people. It is also the second largest state in terms of area, with 163,696 square miles. California is also home to the largest economy in the United States, with a GDP of over $']

In [17]:
inputs = tokenizer(
[
    alpaca_prompt.format(
        "can you introduce Gulf of Mexico?", # instruction
        "", # input
        "", # output - leave this blank for generation!
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 64, use_cache = True)
tokenizer.batch_decode(outputs)

['<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\ncan you introduce Gulf of Mexico?\n\n### Input:\n\n\n### Response:\nGulf of Mexico is a large body of water that is located on the southeastern coast of the United States. It is bounded by the states of Texas, Louisiana, Mississippi, Alabama, and Florida, as well as the Mexican states of Tamaulipas and Veracruz. The Gulf of Mexico is connected to']