In [1]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
     !pip install unsloth
else:
     !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
     !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
     !pip install --no-deps unsloth

In [2]:
from unsloth import FastLanguageModel
import torch
from datasets import load_dataset
import pandas as pd

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!


In [3]:
# Model configuration
max_seq_length = 2048
dtype = None  # None for auto detection
load_in_4bit = True  # Use 4bit quantization to reduce memory usage

# Load the model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct",  # or "unsloth/Llama-3.2-1B-Instruct"
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

==((====))==  Unsloth 2025.6.2: Fast Llama patching. Transformers: 4.52.4.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.35G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/234 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/54.7k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/454 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

chat_template.jinja:   0%|          | 0.00/3.83k [00:00<?, ?B/s]

In [4]:
# Add LoRA adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2025.6.2 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [5]:
# Load and prepare the NCERT Physics dataset
print("Loading NCERT Physics 12th dataset...")
dataset = load_dataset("KadamParth/NCERT_Physics_12th", split="train")

# Let's examine the dataset structure first
print("Dataset columns:", dataset.column_names)
print("First example:", dataset[0])
print("Dataset size:", len(dataset))

# Set up chat template
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "llama-3.1",
)

Loading NCERT Physics 12th dataset...


README.md:   0%|          | 0.00/209 [00:00<?, ?B/s]

Physics_12th_Cleaned.csv:   0%|          | 0.00/4.63M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/5239 [00:00<?, ? examples/s]

Dataset columns: ['Topic', 'Explanation', 'Question', 'Answer', 'Difficulty', 'StudentLevel', 'QuestionType', 'QuestionComplexity', 'Prerequisites', 'EstimatedTime', 'subject', 'grade']
First example: {'Topic': 'Electric Charges and Fields', 'Explanation': 'Electric charges and fields are fundamental concepts in electromagnetism. Electric charges create electric fields, which exert forces on other charges. The spark or shock experienced when handling certain materials is due to the buildup and sudden discharge of static electricity.', 'Question': 'What is the phenomenon that causes a spark when you take off synthetic clothes in dry weather?', 'Answer': 'The phenomenon is the buildup and sudden discharge of static electricity.', 'Difficulty': 'Easy', 'StudentLevel': 'Beginner', 'QuestionType': 'General', 'QuestionComplexity': 1.7, 'Prerequisites': "Coulomb's Law, Electric Field", 'EstimatedTime': 2.0, 'subject': 'Physics', 'grade': 12}
Dataset size: 5239


In [6]:
def create_physics_conversations(examples):
    """
    Convert NCERT Physics dataset to conversation format
    Assumes the dataset has question and answer fields
    """
    conversations = []

    # Adjust these field names based on your dataset structure
    # Common field names might be: 'question', 'answer', 'text', 'content', etc.
    # You may need to modify this based on the actual structure

    for i in range(len(examples['Question'])):  # Adjust field name as needed
        # System message to constrain the model to NCERT Physics knowledge
        system_message = {
            "role": "system",
            "content": "You are an expert physics tutor specialized in NCERT Class 12 Physics. Answer questions clearly and accurately based only on NCERT Class 12 Physics curriculum. If a question is outside the scope of NCERT Class 12 Physics, politely mention that you can only help with NCERT Class 12 Physics topics."
        }

        # User question
        user_message = {
            "role": "user",
            "content": examples['Question'][i]  # Adjust field name as needed
        }

        # Assistant answer
        assistant_message = {
            "role": "assistant",
            "content": examples['Answer'][i]  # Adjust field name as needed
        }

        conversation = [system_message, user_message, assistant_message]
        conversations.append(conversation)

    return {"conversations": conversations}

def formatting_prompts_func(examples):
    """Convert conversations to text format using chat template"""
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize=False, add_generation_prompt=False) for convo in convos]
    return {"text": texts}

In [7]:
# Process the dataset
print("Processing dataset...")

# First, let's inspect the actual structure of your dataset
sample = dataset[0]
print("Sample data structure:")
for key, value in sample.items():
    print(f"  {key}: {type(value)} - {str(value)[:100]}...")

# You may need to adjust the field names in create_physics_conversations function
# based on the actual structure of your dataset

# Apply the conversation creation
try:
    dataset = dataset.map(create_physics_conversations, batched=True, remove_columns=dataset.column_names)
    dataset = dataset.map(formatting_prompts_func, batched=True)

    print("Successfully processed dataset!")
    print("Sample processed text:")
    print(dataset[0]["text"][:500] + "...")

except Exception as e:
    print(f"Error processing dataset: {e}")
    print("Please check the field names in your dataset and adjust the create_physics_conversations function accordingly.")
    # You can inspect your dataset structure here and modify the function

Processing dataset...
Sample data structure:
  Topic: <class 'str'> - Electric Charges and Fields...
  Explanation: <class 'str'> - Electric charges and fields are fundamental concepts in electromagnetism. Electric charges create el...
  Question: <class 'str'> - What is the phenomenon that causes a spark when you take off synthetic clothes in dry weather?...
  Answer: <class 'str'> - The phenomenon is the buildup and sudden discharge of static electricity....
  Difficulty: <class 'str'> - Easy...
  StudentLevel: <class 'str'> - Beginner...
  QuestionType: <class 'str'> - General...
  QuestionComplexity: <class 'float'> - 1.7...
  Prerequisites: <class 'str'> - Coulomb's Law, Electric Field...
  EstimatedTime: <class 'float'> - 2.0...
  subject: <class 'str'> - Physics...
  grade: <class 'int'> - 12...


Map:   0%|          | 0/5239 [00:00<?, ? examples/s]

Map:   0%|          | 0/5239 [00:00<?, ? examples/s]

Successfully processed dataset!
Sample processed text:
<|begin_of_text|><|start_header_id|>system<|end_header_id|>

Cutting Knowledge Date: December 2023
Today Date: 26 July 2024

You are an expert physics tutor specialized in NCERT Class 12 Physics. Answer questions clearly and accurately based only on NCERT Class 12 Physics curriculum. If a question is outside the scope of NCERT Class 12 Physics, politely mention that you can only help with NCERT Class 12 Physics topics.<|eot_id|><|start_header_id|>user<|end_header_id|>

What is the phenomenon tha...


In [8]:
# Training configuration
from trl import SFTTrainer
from transformers import TrainingArguments, DataCollatorForSeq2Seq
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        num_train_epochs = 1,  # Adjust based on your dataset size
        max_steps = 100,  # Increase for better training
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",
    ),
)

Unsloth: Tokenizing ["text"]:   0%|          | 0/5239 [00:00<?, ? examples/s]

In [9]:
# Train only on assistant responses
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<|start_header_id|>user<|end_header_id|>\n\n",
    response_part = "<|start_header_id|>assistant<|end_header_id|>\n\n",
)


Map (num_proc=2):   0%|          | 0/5239 [00:00<?, ? examples/s]

In [10]:
# Show memory stats before training
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = Tesla T4. Max memory = 14.741 GB.
3.441 GB of memory reserved.


In [None]:
# Start training
print("Starting training...")
trainer_stats = trainer.train()

Starting training...


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 5,239 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 24,313,856/3,000,000,000 (0.81% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,0.9839
2,1.1056
3,1.224
4,0.8339
5,0.9955
6,1.0143
7,0.654
8,0.8567
9,0.7313
10,0.8336


Step,Training Loss
1,0.9839
2,1.1056
3,1.224
4,0.8339
5,0.9955
6,1.0143
7,0.654
8,0.8567
9,0.7313
10,0.8336


In [None]:
# Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)
lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

In [None]:
# Test the model
print("\n" + "="*50)
print("TESTING THE FINE-TUNED MODEL")
print("="*50)

FastLanguageModel.for_inference(model)

# Test with a physics question
test_messages = [
    {"role": "user", "content": "Explain the photoelectric effect and Einstein's equation for it."},
]

inputs = tokenizer.apply_chat_template(
    test_messages,
    tokenize = True,
    add_generation_prompt = True,
    return_tensors = "pt",
).to("cuda")

from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)

print("Model response:")
_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 256,
                   use_cache = True, temperature = 0.7, min_p = 0.1)

In [None]:
# Save the model
print("\n" + "="*30)
print("SAVING THE MODEL")
print("="*30)

model.save_pretrained("ncert_physics_model")
tokenizer.save_pretrained("ncert_physics_model")

print("Model saved successfully!")
print("You can load it later using:")
print('model, tokenizer = FastLanguageModel.from_pretrained("ncert_physics_model")')

# Optional: Save to different formats
# Uncomment the lines below if you want to save in different formats

# Save to 16bit merged model
# model.save_pretrained_merged("ncert_physics_16bit", tokenizer, save_method = "merged_16bit")

# Save to GGUF format (for llama.cpp)
# model.save_pretrained_gguf("ncert_physics_gguf", tokenizer, quantization_method = "q4_k_m")

print("\nFine-tuning completed! Your model is now specialized for NCERT Class 12 Physics questions.")