In [1]:
!pip install unsloth transformers trl datasets


Collecting unsloth
  Downloading unsloth-2025.3.19-py3-none-any.whl.metadata (46 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/46.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.2/46.2 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
Collecting trl
  Downloading trl-0.16.0-py3-none-any.whl.metadata (12 kB)
Collecting datasets
  Downloading datasets-3.5.0-py3-none-any.whl.metadata (19 kB)
Collecting unsloth_zoo>=2025.3.17 (from unsloth)
  Downloading unsloth_zoo-2025.3.17-py3-none-any.whl.metadata (8.0 kB)
Collecting xformers>=0.0.27.post2 (from unsloth)
  Downloading xformers-0.0.29.post3-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (1.0 kB)
Collecting bitsandbytes (from unsloth)
  Downloading bitsandbytes-0.45.4-py3-none-manylinux_2_24_x86_64.whl.metadata (5.0 kB)
Collecting tyro (from unsloth)
  Downloading tyro-0.9.18-py3-none-any.whl.metadata (9.2 kB)
Collecting trl
  Downloading trl-0.15.2-py3-n

In [2]:
import torch
from unsloth import FastLanguageModel
from datasets import load_dataset
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth.chat_templates import get_chat_template, standardize_sharegpt


🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
base_model = "unsloth/Llama-3.2-3B-Instruct"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=base_model,
    max_seq_length=2048,
    load_in_4bit=True
)


==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.7k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

In [4]:
peft_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]

model = FastLanguageModel.get_peft_model(
    model, r=16, target_modules=peft_modules
)


Unsloth 2025.3.19 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [5]:
data = load_dataset("mlabonne/FineTome-100k", split="train")

data = standardize_sharegpt(data)

data = data.map(lambda batch: {
    "text": [tokenizer.apply_chat_template(conv, tokenize=False) for conv in batch["conversations"]]
}, batched=True)


README.md:   0%|          | 0.00/982 [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/117M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/100000 [00:00<?, ? examples/s]

Unsloth: Standardizing formats (num_proc=2):   0%|          | 0/100000 [00:00<?, ? examples/s]

Map:   0%|          | 0/100000 [00:00<?, ? examples/s]

In [6]:
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=60,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=1,
    output_dir="fine_tuned_outputs"
)


In [7]:
trainer = SFTTrainer(
    model=model,
    train_dataset=data,
    dataset_text_field="text",
    max_seq_length=2048,
    args=training_args,
)

trainer.train()


tokenizer_config.json:   0%|          | 0.00/54.7k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/100000 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 100,000 | Num Epochs = 1 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856/3,000,000,000 (0.81% trained)
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mabhinav02shukla[0m ([33mabhinav02shukla-chhattisgarh-swami-vivekanand-technical-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,2.5069
2,2.6423
3,1.6377
4,1.9615
5,1.9156
6,1.9862
7,1.1664
8,1.8687
9,1.5218
10,1.4618


TrainOutput(global_step=60, training_loss=1.1852877567211786, metrics={'train_runtime': 787.3947, 'train_samples_per_second': 0.61, 'train_steps_per_second': 0.076, 'total_flos': 6239882593640448.0, 'train_loss': 1.1852877567211786})

In [8]:
model.save_pretrained("fine_tuned_model")


In [9]:
finetuned_model, inference_tokenizer = FastLanguageModel.from_pretrained(
    model_name="./fine_tuned_model",
    max_seq_length=2048,
    load_in_4bit=True
)


==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.2.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [10]:
prompts = ["Explain the principles of investment."]

for query in prompts:
    formatted_query = inference_tokenizer.apply_chat_template([
        {"role": "user", "content": query}
    ], tokenize=False)

    inputs = inference_tokenizer(formatted_query, return_tensors="pt").to("cuda")
    output_ids = finetuned_model.generate(
        **inputs, max_new_tokens=512, temperature=0.7,
        do_sample=True, pad_token_id=inference_tokenizer.pad_token_id
    )
    result = inference_tokenizer.batch_decode(output_ids, skip_special_tokens=True)[0]
    print(result)


system

Cutting Knowledge Date: December 2023
Today Date: 03 Apr 2025

user

Explain the principles of investment.assistant

Investment is the act of allocating resources to assets that are expected to generate returns over time. The principles of investment are based on several key concepts, including:

1. Risk and Return: Investments involve taking on risk in order to achieve returns. The risk of an investment is the likelihood that the investment will result in a loss, while the return is the amount of money that can be earned from the investment.

2. Time Value of Money: The time value of money is the concept that a dollar received today is worth more than a dollar received in the future. This is because money received today can be invested to earn interest and grow in value over time.

3. Diversification: Diversification is the act of spreading investments across different asset classes, such as stocks, bonds, and real estate, in order to reduce risk and increase potential returns