In [None]:
import torch
print(torch.__version__)
print(torch.cuda.is_available())
print(torch.version.cuda)
!pip show bitsandbytes
import bitsandbytes
print(bitsandbytes.__version__)
import bitsandbytes as bnb
import torch
x = torch.randn(10, device="cuda")
y = bnb.functional.quantize_4bit(x)
print("Quantization worked!")
import bitsandbytes.nn
import bitsandbytes.functional
print("Submodules imported successfully!")

In [None]:

pip install transformers==4.45.0 accelerate==0.26.0 bitsandbytes==0.43.3

In [None]:
import transformers
transformers.utils.is_bitsandbytes_available = lambda: True
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import os
import gc

In [None]:
torch.cuda.empty_cache()
gc.collect()

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

In [None]:
# Define model and tokenizer
model_name = "deepseek-ai/deepseek-math-7b-instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
# Set padding token if not already set
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token


In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

In [None]:
from peft import LoraConfig, get_peft_model

In [None]:
# Define LoRA configuration
lora_config = LoraConfig(
    r=16,  # Rank of the LoRA adaptation
    lora_alpha=32,  # Scaling factor
    target_modules=["q_proj", "v_proj"],  # Target attention layers (adjust based on model architecture)
    lora_dropout=0.05,  # Dropout for regularization
    bias="none",  # No bias in LoRA layers
    task_type="CAUSAL_LM",  # Task type for causal language modeling
)

In [None]:
# Apply LoRA to the model
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Verify trainable parameters

In [None]:
dataset = [
    {
        "input": "Is this equation correct? 7 * 3 = 21",
        "output": "Yes. Explanation: 7 multiplied by 3 equals 21. This is standard multiplication."
    },
    {
        "input": "Is this equation correct? 5² = 10",
        "output": "No. Correct answer: 25. Explanation: 5 squared means 5 × 5, not 5 × 2."
    },
    {
        "input": "Is this equation correct? 8 ÷ 2(2+2) = 1",
        "output": "No. Correct answer: 16. Explanation: PEMDAS rules: 8 ÷ 2 × 4 = 16 (left-to-right for division/multiplication)."
    },
    {
        "input": "Is this equation correct? 10 - 2 × 3 = 4",
        "output": "Yes. Explanation: Follow PEMDAS: 2×3=6 first, then 10-6=4."
    },
    {
        "input": "Is this equation correct? 3 + 4 × 2 = 14",
        "output": "No. Correct answer: 11. Explanation: PEMDAS requires multiplication first: 4×2=8, then 3+8=11."
    },
    {
        "input": "Is this equation correct? (-3)² = -9",
        "output": "No. Correct answer: 9. Explanation: Squaring negatives yields positives: (-3)×(-3)=9."
    },
    {
        "input": "Is this equation correct? 5! = 120",
        "output": "Yes. Explanation: 5 factorial = 5×4×3×2×1=120."
    },
    {
        "input": "Is this equation correct? 1/2 + 1/3 = 2/5",
        "output": "No. Correct answer: 5/6. Explanation: Common denominator needed: 3/6 + 2/6 = 5/6."
    },
    {
        "input": "Is this equation correct? 2³ + 2² = 2⁵",
        "output": "No. Correct answer: 12. Explanation: Calculate exponents first: 8 + 4 = 12 ≠32."
    },
    {
        "input": "Is this equation correct? 1/4 × 4 = 1",
        "output": "Yes. Explanation: Multiplying by reciprocal: (¼)×4=1."
    },
    {
        "input": "Is this equation correct? 0.2 × 0.3 = 0.6",
        "output": "No. Correct answer: 0.06. Explanation: Tenths × tenths = hundredths: 0.2×0.3=0.06."
    },
    {
        "input": "Is this equation correct? (x + y)² = x² + y²",
        "output": "No. Correct answer: x² + 2xy + y². Explanation: FOIL expansion shows missing middle term."
    },
    {
        "input": "Is this equation correct? √16 = 4",
        "output": "Yes. Explanation: Principal square root of 16 is indeed 4."
    },
    
    {
        "input": "Is this equation correct? √9 = ±3",
        "output": "No. Correct answer: 3. Explanation: Principal square root is always non-negative."
    },
    {
        "input": "Is this equation correct? Probability of at least one head in two flips = 1/3",
        "output": "No. Correct answer: 3/4. Explanation: Possible outcomes: HH, HT, TH, TT (3/4 favorable)."
    },
    {
        "input": "Is this equation correct? 4 × 5 = 20",
        "output": "Yes. Explanation: Basic multiplication fact: 4 groups of 5 equals 20."
    },
    {
        "input": "Is this equation correct? 3! + 4! = 7!",
        "output": "No. Correct answer: 30. Explanation: 3!=6 + 4!=24 =30 ≠5040."
    },
    {
        "input": "Is this equation correct? log(10) + log(10) = log(20)",
        "output": "No. Correct answer: 2. Explanation: log(a)+log(b)=log(ab). log(10×10)=log(100)=2."
    },
    {
        "input": "Is this equation correct? 3² = 9",
        "output": "Yes. Explanation: 3 squared means 3×3=9. Correct exponentiation."
    },
    {
        "input": "Is this equation correct? Area of circle (radius 3) = 6π",
        "output": "No. Correct answer: 9π. Explanation: Area=πr²=π×3²=9π."
    },
    
    
    {
        "input": "Is this equation correct? 1/2 + 1/2 = 1",
        "output": "Yes. Explanation: Adding two identical fractions: ½ + ½ = 1 whole."
    },
    
    {
        "input": "Is this equation correct? Two 50% increases = 100% total",
        "output": "No. Correct answer: 125% increase. Explanation: Compounded growth: 1.5×1.5=2.25 (125% increase)."
    },
    {
        "input": "Is this equation correct? 1 ÷ (1/2) = 0.5",
        "output": "No. Correct answer: 2. Explanation: Dividing by a fraction = multiply by reciprocal: 1×2=2."
    },
    {
        "input": "Is this equation correct? log(100) = 2",
        "output": "Yes. Explanation: Base 10 logarithm: 10²=100 → log(100)=2."
    },
    {
        "input": "Is this equation correct? Slope of (2,3)→(4,7) = -2",
        "output": "No. Correct answer: 2. Explanation: (7-3)/(4-2)=4/2=2. Slope formula requires (y₂-y₁)/(x₂-x₁)."
    },
    {
        "input": "Is this equation correct? Angle in semicircle = 180°",
        "output": "No. Correct answer: 90°. Explanation: Thales' theorem: Angle opposite diameter is right angle."
    },
    {
        "input": "Is this equation correct? 0⁰ = 0",
        "output": "No. Correct answer: Undefined. Explanation: 0⁰ is indeterminate form in mathematics."
    },
    {
        "input": "Is this equation correct? Area of rectangle 3×4 =12",
        "output": "Yes. Explanation: Area = length × width: 3×4=12."
    },
    {
        "input": "Is this equation correct? Volume of sphere (diameter 4) = (4/3)π(4)³",
        "output": "No. Correct answer: (4/3)π(2)³. Explanation: Volume formula uses radius (diameter/2)."
    },
    {
        "input": "Is this equation correct? 1 - 0.999... = 0.001",
        "output": "No. Correct answer: 0. Explanation: 0.999... equals 1 exactly. Infinite decimals represent limits."
    },
    {
        "input": "Is this equation correct? (2x)³ = 2x³",
        "output": "No. Correct answer: 8x³. Explanation: Both coefficient and variable are cubed: (2)³=8, (x)³=x³."
    },
    
    {
        "input": "Is this equation correct? 1 mile = 5280 feet",
        "output": "Yes. Explanation: Standard imperial conversion: 1 mile = 5280 feet."
    },
    {
        "input": "Is this equation correct? √(3² +4²) =7",
        "output": "No. Correct answer:5. Explanation: Pythagorean theorem: √(9+16)=√25=5."
    },
    {
        "input": "Is this equation correct? Slope of (0,0)→(2,2) =1",
        "output": "Yes. Explanation: (2-0)/(2-0)=2/2=1. Correct slope calculation."
    },
    {
        "input": "Is this equation correct? 10 ÷ 2(3 + 2) = 1",
        "output": "No. Correct answer: 25. Explanation: PEMDAS: 10 ÷ 2 × 5 = 25 (parentheses first, then division/multiplication left-to-right)."
    },
    {
        "input": "Is this equation correct? 0.5 × 0.5 = 0.5",
        "output": "No. Correct answer: 0.25. Explanation: Multiplying tenths by tenths gives hundredths: 0.5 × 0.5 = 0.25."
    },
    {
        "input": "Is this equation correct? (x³)(x⁴) = x⁷",
        "output": "Yes. Explanation: Exponent rule: xᵃ × xᵇ = xᵃ⁺ᵇ → 3+4=7."
    },
    {
        "input": "Is this equation correct? 1/3 + 1/4 = 2/7",
        "output": "No. Correct answer: 7/12. Explanation: Common denominator 12: 4/12 + 3/12 = 7/12."
    },
    {
        "input": "Is this equation correct? 3² × 3³ = 9⁵",
        "output": "No. Correct answer: 3⁵ = 243. Explanation: Same base exponents add: 3²⁺³ = 3⁵ ≠ 9⁵."
    },
    {
        "input": "Is this equation correct? Volume of cube (side 2) = 8",
        "output": "Yes. Explanation: Volume = side³: 2×2×2=8."
    },
    {
        "input": "Is this equation correct? 0.999... = 1",
        "output": "Yes. Explanation: Infinite decimal 0.999... equals 1 by limit definition."
    },
    {
        "input": "Is this equation correct? 1 meter = 1000 centimeters",
        "output": "No. Correct answer: 100 cm. Explanation: 1 m = 100 cm (centi- = 1/100)."
    },
    {
        "input": "Is this equation correct? sin(90°) = 0",
        "output": "No. Correct answer: 1. Explanation: Sine of 90 degrees (right angle) equals 1."
    },
    {
        "input": "Is this equation correct? 2⁰ = 0",
        "output": "No. Correct answer: 1. Explanation: Any non-zero number to 0th power equals 1."
    },
    {
        "input": "Is this equation correct? 1 km² = 1,000 m²",
        "output": "No. Correct answer: 1,000,000 m². Explanation: Square units: (1000m)² = 1,000,000m²."
    },
    {
        "input": "Is this equation correct? 25% of 200 = 50",
        "output": "Yes. Explanation: 25% = ¼ → 200 ÷ 4 = 50."
    },
    {
        "input": "Is this equation correct? (a + b)³ = a³ + b³",
        "output": "No. Correct answer: a³ + 3a²b + 3ab² + b³. Explanation: Requires binomial expansion."
    },
    {
        "input": "Is this equation correct? 0.25 = 1/5",
        "output": "No. Correct answer: 1/4. Explanation: 0.25 = 25/100 = 1/4 ≠ 0.2 (1/5)."
    },
    {
        "input": "Is this equation correct? 1 hour = 360 seconds",
        "output": "No. Correct answer: 3600 seconds. Explanation: 60 minutes × 60 seconds = 3600."
    },
    {
        "input": "Is this equation correct? √(4 + 9) = √4 + √9",
        "output": "No. Correct answer: √13 ≈ 3.605. Explanation: √a + √b ≠ √(a+b) in general."
    },
    {
        "input": "Is this equation correct? 10 - 5 ÷ 5 = 9",
        "output": "Yes. Explanation: PEMDAS: 5 ÷ 5 =1 first, then 10 -1 =9."
    },
    {
        "input": "Is this equation correct| 1/0 = ∞",
        "output": "No. Correct answer: Undefined. Explanation: Division by zero is undefined in mathematics."
    },
    {
        "input": "Is this equation correct? 3x + 2x = 5x²",
        "output": "No. Correct answer: 5x. Explanation: Like terms add coefficients: 3+2=5, not multiplied."
    },
    {
        "input": "Is this equation correct? 1 ton = 2000 pounds",
        "output": "Yes. Explanation: Standard US ton definition: 1 short ton = 2000 lbs."
    },
    {
        "input": "Is this equation correct? 2² × 3² = 6²",
        "output": "Yes. Explanation: (2×3)² = 6² =36. Both sides equal 36."
    },
    {
        "input": "Is this equation correct? 1 liter = 1000 milliliters",
        "output": "Yes. Explanation: Metric system: milli- = 1/1000 → 1L =1000mL."
    },
    {
        "input": "Is this equation correct? 5% = 0.005",
        "output": "No. Correct answer: 0.05. Explanation: Percent means per hundred: 5% =5/100=0.05."
    },
    {
        "input": "Is this equation correct? Average of 10,20,30 =25",
        "output": "No. Correct answer:20. Explanation: (10+20+30)/3=60/3=20."
    },
    {
        "input": "Is this equation correct? 1/2 ÷ 1/4 = 2",
        "output": "Yes. Explanation: Dividing by fraction: ½ × 4/1 =2."
    },
    {
        "input": "Is this equation correct? 3 weeks = 504 hours",
        "output": "Yes. Explanation: 3×7 days=21 days ×24 hrs=504."
    },
    {
        "input": "Is this equation correct? (x²)³ = x⁵",
        "output": "No. Correct answer: x⁶. Explanation: Exponent rule: (xᵃ)ᵇ =xᵃᵇ →2×3=6."
    },
    {
        "input": "Is this equation correct? 1+2×3 =7",
        "output": "No. Correct answer:7. Wait, actually this is correct! 2×3=6 +1=7."
    },
    {
        "input": "Is this equation correct? 2/3 of 9 =6",
        "output": "Yes. Explanation: 9 ÷3×2=3×2=6."
    },
    {
        "input": "Is this equation correct? 1 year = 365 days",
        "output": "No. Correct answer: ~365.25 days. Explanation: Solar year ≈365.25 days (leap years adjust)."
    },
    {
        "input": "Is this equation correct? 0.1 +0.2 =0.3",
        "output": "Yes. Explanation: Decimal addition: 0.1+0.2=0.3 (though floating-point systems may approximate)."
    },
    {
        "input": "Is this equation correct? 4 + 5 × 2 =18",
        "output": "No. Correct answer:14. Explanation: PEMDAS: 5×2=10 first, then 4+10=14."
    },
    {
        "input": "Is this equation correct? 1 yard = 3 feet",
        "output": "Yes. Explanation: Imperial conversion: 3 feet =1 yard."
    },
    {
        "input": "Is this equation correct? 1/∞ =0",
        "output": "Yes. Explanation: Limit concept: As denominator approaches infinity, value approaches zero."
    },
    {
        "input": "Is this equation correct? 2⁴ =8",
        "output": "No. Correct answer:16. Explanation: 2×2×2×2=16 ≠8."
    },
    {
        "input": "Is this equation correct? 1000g =1kg",
        "output": "Yes. Explanation: Metric system: kilo- =1000 →1kg=1000g."
    },
    {
        "input": "Is this equation correct? 1 decade =100 years",
        "output": "No. Correct answer:10 years. Explanation: Decade=10 years, century=100."
    },
    {
        "input": "Is this equation correct? 5×0=5",
        "output": "No. Correct answer:0. Explanation: Any number multiplied by zero equals zero."
    },
    {
        "input": "Is this equation correct? 2² + 2³ =12",
        "output": "Yes. Explanation: 4 +8=12."
    },
    {
        "input": "Is this equation correct? 3/4 =75%",
        "output": "Yes. Explanation: 3÷4=0.75=75%."
    },
    {
        "input": "Is this equation correct? 10² -10 =90",
        "output": "Yes. Explanation: 100 -10=90."
    },
    {
        "input": "Is this equation correct? Perimeter of square (side 5) =25",
        "output": "No. Correct answer:20. Explanation: Perimeter=4×side=4×5=20."
    },
    {
        "input": "Is this equation correct? 2 hours 30 mins =150 minutes",
        "output": "Yes. Explanation: 2×60=120 +30=150 mins."
    },
    {
        "input": "Is this equation correct? 1/8 >1/4",
        "output": "No. Correct answer:1/4 >1/8. Explanation: Larger denominator = smaller fraction."
    },
    {
        "input": "Is this equation correct? 7×8=54",
        "output": "No. Correct answer:56. Explanation: Common multiplication error - 7×8=56."
    },
    {
        "input": "Is this equation correct? 1 dozen =12",
        "output": "Yes. Explanation: Standard definition: 1 dozen =12 units."
    },
    {
        "input": "Is this equation correct? 15% tip on $40 =$6",
        "output": "Yes. Explanation: 10%=$4 +5%=$2 → Total $6."
    },
    {
        "input": "Is this equation correct? 3/4 ÷ 2 =3/8",
        "output": "Yes. Explanation: Dividing by 2 = multiplying by ½: ¾×½=3/8."
    }
]

In [None]:
# Prepare dataset for training
def format_data(example):
    # Format input and output as a conversation
    messages = [
        {"role": "user", "content": example["input"]},
        {"role": "assistant", "content": example["output"]}
    ]
    # Apply chat template and tokenize
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
    return {"text": text}

In [None]:
from datasets import Dataset
# Convert list to Hugging Face Dataset
hf_dataset = Dataset.from_list(dataset)
tokenized_dataset = hf_dataset.map(format_data, remove_columns=["input", "output"])

In [None]:
# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(
        examples["text"],
        padding="max_length",
        truncation=True,
        max_length=512,
        return_tensors="pt"
    )


In [None]:
tokenized_dataset = tokenized_dataset.map(tokenize_function, batched=True)

In [None]:
# Split dataset into train and eval (90% train, 10% eval)
train_test_split = tokenized_dataset.train_test_split(test_size=0.1)
train_dataset = train_test_split["train"]
eval_dataset = train_test_split["test"]

In [None]:
# Define data collator
from transformers import DataCollatorForLanguageModeling
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

In [None]:
from transformers import TrainingArguments, Trainer

In [None]:
# Define training arguments
training_args = TrainingArguments(
    output_dir="/kaggle/working/model_output",
    overwrite_output_dir=True,
    num_train_epochs=3,
    per_device_train_batch_size=2,  # Adjust based on GPU memory (T4x2)
    per_device_eval_batch_size=2,
    gradient_accumulation_steps=4,  # Effective batch size = 2 * 4 = 8
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    weight_decay=0.01,
    fp16=True,  # Use mixed precision for T4 GPU
    logging_dir="/kaggle/working/logs",
    logging_steps=10,
    load_best_model_at_end=True,
    metric_for_best_model="loss",
    report_to="none",  # Disable wandb in Kaggle
    push_to_hub=False,
)

In [None]:
# Define compute metrics (optional, for evaluation)
def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    return {"accuracy": (predictions == labels).mean().item()}


In [None]:
# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    data_collator=data_collator,
    # compute_metrics=compute_metrics  # Uncomment if you want accuracy metrics
)

In [None]:
# Train the model
trainer.train()

In [None]:
# Save the model and tokenizer
output_dir = "/kaggle/working/finetuned_model"
model.save_pretrained(output_dir)
tokenizer.save_pretrained(output_dir)

# Zip the model directory for easy download (optional)
import shutil
shutil.make_archive("/kaggle/working/finetuned_model", "zip", output_dir)
print("Model and tokenizer saved and zipped at /kaggle/working/finetuned_model.zip")

# Test inference
messages = [
    {"role": "user", "content": "Is this equation correct? 2 / 2 = 5"}
]
input_tensor = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
outputs = model.generate(input_tensor, max_new_tokens=100, pad_token_id=tokenizer.eos_token_id)
result = tokenizer.decode(outputs[0][input_tensor.shape[1]:], skip_special_tokens=True)
print("Test inference result:", result)

In [None]:
from peft import PeftModel

In [None]:

output_weights_path = "/kaggle/working/fine_tuned_deepseek_math_weights.pth"
torch.save(model.state_dict(), output_weights_path)