In [1]:
!pip install unsloth transformers trl

Collecting unsloth
  Downloading unsloth-2025.3.19-py3-none-any.whl.metadata (46 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting trl
  Downloading trl-0.16.0-py3-none-any.whl.metadata (12 kB)
Collecting unsloth_zoo>=2025.3.17 (from unsloth)
  Downloading unsloth_zoo-2025.3.17-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.18-py3-none-any.whl.metadata (9.2 kB)
Collecting datasets>=2.16.0 (from unsloth)
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting trl
  Downloading trl-0.15.2-py3-none-any.whl.metadata (11 kB)
Collecting protobuf<4.0.0 (from unsloth)
  Downloading protobu

In [1]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth.chat_templates import get_chat_template,standardize_sharegpt

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [2]:
from unsloth import FastLanguageModel

model, tokenizer = FastLanguageModel.from_pretrained(

   model_name = "unsloth/llama-3-8b-Instruct",
    max_seq_length=2048,
    load_in_4bit=True,
)



==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.70G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/220 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/51.1k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/345 [00:00<?, ?B/s]

In [3]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "up_proj", "down_proj"],
)

Not an error, but Unsloth cannot patch MLP layers with our manual autograd engine since either LoRA adapters
are not enabled or a bias term (like in Qwen) is used.
Unsloth 2025.3.19 patched 32 layers with 32 QKV layers, 32 O layers and 0 MLP layers.


In [4]:
tokenizer = get_chat_template(tokenizer,chat_template="llama-3")


In [5]:
dataset = load_dataset("mlabonne/FineTome-100k",split="train")

README.md:   0%|          | 0.00/982 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/117M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [6]:
print("Dataset features:", dataset.features)


Dataset features: {'conversations': [{'from': Value(dtype='string', id=None), 'value': Value(dtype='string', id=None)}], 'source': Value(dtype='string', id=None), 'score': Value(dtype='float64', id=None)}


In [7]:
dataset

Dataset({
    features: ['conversations', 'source', 'score'],
    num_rows: 100000
})

In [8]:
dataset[0]

{'conversations': [{'from': 'human',
   'value': 'Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. \n\nFurthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.\n\nFinally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages.'},
  {'from': 'gpt

In [9]:
def format_conversations(examples):
    # Transform conversations to standard message format
    formatted_texts = []
    for conversation in examples["conversations"]:
        messages = []
        for turn in conversation:
            messages.append({
                "role": "user" if turn["from"] == "human" else "assistant",
                "content": turn["value"]
            })
        # Apply chat template to the standardized messages
        formatted_texts.append(
            tokenizer.apply_chat_template(
                messages,
                tokenize=False,
                add_generation_prompt=True
            )
        )
    return {"text": formatted_texts}

# Apply the formatting
dataset = dataset.map(
    format_conversations,
    batched=True,
    remove_columns=dataset.column_names  # Remove original columns to save space
)


Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [10]:


# 4. Create trainer with ALL required parameters
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,  # Must explicitly pass tokenizer
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=2048,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=100,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),
        logging_steps=1,
        output_dir="outputs",
        optim="adamw_8bit",  # Required for 4-bit training
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/100000 [00:00<?, ? examples/s]

In [11]:
!pip install wandb






In [12]:
!wandb login

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33msingh-nupurvns[0m ([33msingh-nupurvns-redi-school[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [13]:
import wandb
wandb.init(project = "Finetunning Llama 3.2 3B with LLM project")
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33msingh-nupurvns[0m ([33msingh-nupurvns-redi-school[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 100,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 32,505,856/8,000,000,000 (0.41% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,1.2517
2,1.6345
3,1.2107
4,1.2544
5,1.2146
6,1.4354
7,0.9735
8,1.5253
9,1.3044
10,1.4033


TrainOutput(global_step=60, training_loss=1.0588228404521942, metrics={'train_runtime': 1050.9096, 'train_samples_per_second': 0.457, 'train_steps_per_second': 0.057, 'total_flos': 1.613432229101568e+16, 'train_loss': 1.0588228404521942})

In [16]:
model.save_pretrained("finetuned_model")

In [17]:
inference_model,inference_tokenizer = FastLanguageModel.from_pretrained(
    model_name = "./finetuned_model",
    max_seq_length=2048,
    load_in_4bit=True,
)

==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [22]:
text_prompts = [
    "What are the key principles of investment?"
]

for prompt in text_prompts:
    # Correctly format the chat template (remove quotes around 'prompt')
    formatted_prompt = inference_tokenizer.apply_chat_template(
        [{"role": "user", "content": prompt}],  # Use the variable, not string "prompt"
        tokenize=False
    )

    model_inputs = inference_tokenizer(formatted_prompt, return_tensors="pt").to("cuda")

    generated_ids = inference_model.generate(
        **model_inputs,
        max_new_tokens=512,
        temperature=0.7,
        do_sample=True,
        pad_token_id=inference_tokenizer.pad_token_id
    )

    response = inference_tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
    print(response)


user

What are the key principles of investment?assistant

The key principles of investment are as follows:
1. **Risk and Return**: The principle states that there is a direct relationship between risk and return. The higher the risk, the higher the potential return and vice versa. This means that investors should be aware of the potential risks involved in an investment and consider them while making an investment decision.
2. **Time Value of Money**: This principle suggests that a dollar today is worth more than a dollar in the future. This is because money received today can be invested to earn interest or used to generate returns, whereas money received in the future will have to be invested for a longer period to earn the same returns.
3. **Diversification**: Diversification is the principle of spreading investments across different asset classes to reduce risk. This means that an investor should not put all their eggs in one basket, but instead, they should diversify their invest