### Mount Google Drive

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Install Dependencies

In [2]:
!pip install --no-deps git+https://github.com/unslothai/unsloth-zoo.git
!pip install --no-deps git+https://github.com/unslothai/unsloth.git
!pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes
!pip install transformers datasets

Collecting git+https://github.com/unslothai/unsloth-zoo.git
  Cloning https://github.com/unslothai/unsloth-zoo.git to /tmp/pip-req-build-jwvod3s5
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth-zoo.git /tmp/pip-req-build-jwvod3s5
  Resolved https://github.com/unslothai/unsloth-zoo.git to commit acadb7ed8f523611e7bb095a6d13f37f4344d986
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: unsloth_zoo
  Building wheel for unsloth_zoo (pyproject.toml) ... [?25l[?25hdone
  Created wheel for unsloth_zoo: filename=unsloth_zoo-2025.9.14-py3-none-any.whl size=254562 sha256=14aa81c54f4ac9cfd03f0149eb279cb43e0b24af0cea5a28819ec0268d661f03
  Stored in directory: /tmp/pip-ephem-wheel-cache-2eukrqsy/wheels/9d/0c/54/ad276b0d92eb003228484651d6eb874d44608b51b1bf8438bb
Successfully built unsloth_zoo
Ins

### Setup and Imports

In [3]:
from unsloth import FastLanguageModel, is_bfloat16_supported
import torch
import json
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments


max_seq_length = 2048
dtype = None
load_in_4bit = True
fourbit_models = [
    "unsloth/mistral-7b-v0.3-bnb-4bit",
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/llama-3-8b-bnb-4bit",
    "unsloth/llama-3-8b-Instruct-bnb-4bit",
    "unsloth/llama-3-70b-bnb-4bit",
    "unsloth/Phi-3-mini-4k-instruct",
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/mistral-7b-bnb-4bit",
    "unsloth/gemma-7b-bnb-4bit",
]

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


### Load Model and Tokenizer

In [4]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2025.9.11: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/198 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

### Load Base Model

In [5]:
# Load the base model
model_base, tokenizer_base = FastLanguageModel.from_pretrained(
    model_name = "unsloth/llama-3-8b-bnb-4bit",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    device_map = "cuda", # Explicitly set device_map to cuda
)

# Define the alpaca_prompt
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

from transformers import TextStreamer

# Configure the base model for inference
FastLanguageModel.for_inference(model_base)

==((====))==  Unsloth 2025.9.11: Fast Llama patching. Transformers: 4.56.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.32.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096, padding_idx=128255)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSN

#Run Inference with Base Model

In [9]:
# This cell can be run repeatedly with different inputs for the base model

item_to_describe_base = "The Book of Revelation" # Change this input for different inferences with the base model

# Prepare input for inference
inputs_base = tokenizer_base(
[
    alpaca_prompt.format(
        "Generate a description for the following item",
        item_to_describe_base, # input
        "",
    )
], return_tensors = "pt").to("cuda")

# Run inference with the base model
text_streamer_base = TextStreamer(tokenizer_base)
_ = model_base.generate(**inputs_base, streamer = text_streamer_base, max_new_tokens = 128)

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Generate a description for the following item

### Input:
The Book of Revelation

### Response:
The Book of Revelation is a book of the Bible. It is the last book in the New Testament. It is a book of prophecy. It describes the end of the world and the second coming of Jesus Christ.

### Response 2:
The Book of Revelation is a book of the Bible. It is the last book in the New Testament. It is a book of prophecy. It describes the end of the world and the second coming of Jesus Christ.

### Response 3:
The Book of Revelation is a book of the Bible. It is the last book in the New Testament. It is a book of prophecy. It describes the end
