## Step 0 - Installing Librariess

In [3]:
!pip install -q --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl triton cut_cross_entropy unsloth_zoo
!pip install -q sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
!pip install -q --no-deps unsloth
!pip install regex safetensors tokenizers numpy tqdm packaging
!pip install "transformers[torch]"

Collecting regex
  Downloading regex-2025.7.34-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (40 kB)
Collecting safetensors
  Downloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.1 kB)
Collecting tokenizers
  Downloading tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.7 kB)
Downloading regex-2025.7.34-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (801 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m801.9/801.9 kB[0m [31m1.3 MB/s[0m  [33m0:00:00[0mm0:00:01[0m0m
[?25hDownloading safetensors-0.6.2-cp38-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (485 kB)
Downloading tokenizers-0.21.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m1.9 MB/s[0m  [33m0:00:01[0m eta [36m0:00:01[0m
[?25hInstallin

## Step 1 - Choose a Base Model

In [4]:
from unsloth import FastModel
import torch
from transformers import AutoConfig # Import AutoConfig

# Choose any model of your choosing, based on your GPU RAM
default = "md-nishat-008/TigerLLM-1B-it"
# gemma3_1b = "google/gemma-3-1b-it"
# gemma3_4b = "google/gemma-3-4b-it"
# gemma3_12b = "google/gemma-3-12b-it"
# gemma3_27b = "google/gemma-3-27b-it"

# Load model with the corrected config
model, tokenizer = FastModel.from_pretrained(
    model_name = default,
    max_seq_length = 1024,
    load_in_4bit = False,
    load_in_8bit = True,
    full_finetuning = False,
    # token = "hf_...", # In case you need it
)


# Note - You will find your Hugging Face Access Token in your Hugging Face Account.
# This will be necessary for Gemma & LLaMA models.

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


    PyTorch 2.6.0+cu124 with CUDA 1204 (you have 2.8.0+cu129)
    Python  3.12.9 (you have 3.12.3)
  Please reinstall xformers (see https://github.com/facebookresearch/xformers#installing-xformers)
  Memory-efficient attention, SwiGLU, sparse and more won't be available.
  Set XFORMERS_MORE_DETAILS=1 for more details


🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.8.9: Fast Gemma3 patching. Transformers: 4.55.4.
   \\   /|    NVIDIA GeForce GTX 1650 SUPER. Num GPUs = 1. Max memory: 3.622 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.8.0+cu129. CUDA: 7.5. CUDA Toolkit: 12.9. Triton: 3.4.0
\        /    Bfloat16 = FALSE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
Unsloth: Using float16 precision for gemma3 won't work! Using float32.


## Step 2 - Play with the Base Model

In [5]:
from unsloth.chat_templates import get_chat_template

# Attach the TigerLLM & Gemma-3 chat template to the tokenizer
tokenizer = get_chat_template(tokenizer, chat_template="gemma-3")

# Compose the conversation
bangla_q = (
    "মৌলিক সংখ্যা বের করার জন্য একটি পাইথন ফাংশন লিখুন"
)


messages = [
    {
        "role": "user",
        "content": bangla_q,
    }
]

# Render the chat template to plain text
chat_text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,  # must be True for generation
    tokenize=False               # return a plain string
)

# Tokenize and generate
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    use_cache=False,
    max_new_tokens=256,           # increase for longer outputs
    temperature=0.3,
    top_p=0.95,
    top_k=64,
)

print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])

user
মৌলিক সংখ্যা বের করার জন্য একটি পাইথন ফাংশন লিখুন
model
```python
def is_prime(n):
    if n <= 1:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True
```


## Step 3 - Set the LoRA Adapters

In [6]:
model = FastModel.get_peft_model(
    model,
    finetune_vision_layers     = False, # Turn off for just text!
    finetune_language_layers   = True,  # Should leave on!
    finetune_attention_modules = True,  # Attention good for GRPO
    finetune_mlp_modules       = True,  # SHould leave on always!

    r = 16,           # Larger = higher accuracy, but might overfit
    lora_alpha = 32,  # Recommended alpha == r at least
    lora_dropout = 0,
    bias = "none",
    random_state = 3407,
)

Unsloth: Making `model.base_model.model.model` require gradients


## Step 4 - Data Prep

### Chat Template

In [7]:
from unsloth.chat_templates import get_chat_template
tokenizer = get_chat_template(
    tokenizer,
    chat_template = "gemma-3",
)

### Use Your Own Instruction Dataset
### You will find a sample dataset here - https://noshinulfat.github.io/blp25_code_generation_task/#/task-announcement

In [8]:
dataset_trial_path = "dataset/trial.csv"
dataset_dev_path = "dataset/dev_v2.csv"

In [9]:
## We will use a very small sample dataset
from datasets import load_dataset

# Load the local CSV (will appear as the single default “train” split)
dataset = load_dataset(
    "csv",
    data_files=dataset_trial_path,   # path relative to your current directory
)["train"]                               # pull the one split that load_dataset creates

print(len(dataset), "rows loaded.")


Generating train split: 74 examples [00:00, 793.36 examples/s]

74 rows loaded.





### Map

In [10]:
def to_finetome(example):
    return {
        "conversations": [
            {"role": "user",  "content": example["instruction"]},
            {"role": "model", "content": example["response"]},   # ← key change
        ]
    }

dataset = dataset.map(
    to_finetome,
    num_proc=4,
    remove_columns=["instruction", "response"]
)

Map (num_proc=4): 100%|██████████| 74/74 [00:00<00:00, 100.30 examples/s]


### Check the First Instance

In [11]:
dataset[0]

{'id': 1,
 'test_list': '"[\'assert smallest_multiple(13)==360360\', \'assert smallest_multiple(2)==2\', \'assert smallest_multiple(1)==1\']"',
 'conversations': [{'content': 'প্রথম n সংখ্যার ক্ষুদ্রতম গুণিতক খুঁজে বের করার জন্য একটি ফাংশন লিখুন।',
   'role': 'user'},
  {'content': 'def smallest_multiple(n):\r\n    if (n<=2):\r\n      return n\r\n    i = n * 2\r\n    factors = [number  for number in range(n, 1, -1) if number * 2 > n]\r\n    while True:\r\n        for a in factors:\r\n            if i % a != 0:\r\n                i += n\r\n                break\r\n            if (a == factors[-1] and i % a == 0):\r\n                return i',
   'role': 'model'}]}

### Formatting

In [12]:
def formatting_prompts_func(batch):
    texts = []
    for convo in batch["conversations"]:
        try:
            serialized = tokenizer.apply_chat_template(
                convo,
                tokenize=False,
                add_generation_prompt=False,
            ).removeprefix("<bos>")
        except Exception:          # catches TemplateError, etc.
            serialized = ""        # placeholder keeps list length in sync
        texts.append(serialized)
    return {"text": texts}

dataset = dataset.map(formatting_prompts_func, batched=True)


Map: 100%|██████████| 74/74 [00:00<00:00, 1241.75 examples/s]


### Check the First Instance Again

In [13]:
dataset[0]["text"]

'<start_of_turn>user\nপ্রথম n সংখ্যার ক্ষুদ্রতম গুণিতক খুঁজে বের করার জন্য একটি ফাংশন লিখুন।<end_of_turn>\n<start_of_turn>model\ndef smallest_multiple(n):\r\n    if (n<=2):\r\n      return n\r\n    i = n * 2\r\n    factors = [number  for number in range(n, 1, -1) if number * 2 > n]\r\n    while True:\r\n        for a in factors:\r\n            if i % a != 0:\r\n                i += n\r\n                break\r\n            if (a == factors[-1] and i % a == 0):\r\n                return i<end_of_turn>\n'

## Step 5 - Finetuning Config
### Adjust as you see fit

In [19]:
from trl import SFTTrainer, SFTConfig
import math

# --- constants ----------------------------------------------------------
DATASET_SIZE                = 74
PER_DEV_BATCH               = 16
GRAD_ACC_STEPS              = 4
EPOCHS                      = 2

# derived values (no longer passed as kwargs)
EFFECTIVE_BATCH             = PER_DEV_BATCH * GRAD_ACC_STEPS
STEPS_PER_EPOCH             = math.ceil(DATASET_SIZE / EFFECTIVE_BATCH)
MAX_STEPS                   = EPOCHS * STEPS_PER_EPOCH

cfg = SFTConfig(
    dataset_text_field          = "text",
    # ── memory / speed knobs ────────────────────────────────────────────
    packing                     = False,
    per_device_train_batch_size = PER_DEV_BATCH,
    gradient_accumulation_steps = GRAD_ACC_STEPS,
    gradient_checkpointing      = True,
    bf16                        = False,
    fp16                        = True,
    optim                       = "adamw_8bit",

    # ── schedule / optimisation ────────────────────────────────────────
    num_train_epochs            = EPOCHS,       # renamed
    max_steps                   = MAX_STEPS,    # keeps the same target
    warmup_steps                = 10,
    lr_scheduler_type           = "cosine",
    learning_rate               = 1e-4,
    weight_decay                = 0.01,

    # ── misc ───────────────────────────────────────────────────────────
    logging_steps               = 1,  ## Update this
    dataset_num_proc            = 8,
    seed                        = 3407,
    report_to                   = "none",
)

trainer = SFTTrainer(
    model          = model,        # load with attn_implementation="flash_attention_2"
    tokenizer      = tokenizer,
    train_dataset  = dataset,
    args           = cfg,
)


Unsloth: Switching to float32 training since model cannot work with float16


Unsloth: Tokenizing ["text"] (num_proc=8): 100%|██████████| 74/74 [00:04<00:00, 16.21 examples/s]


### Masking

In [20]:
from unsloth.chat_templates import train_on_responses_only
trainer = train_on_responses_only(
    trainer,
    instruction_part = "<start_of_turn>user\n",
    response_part = "<start_of_turn>model\n",
)

Map (num_proc=12): 100%|██████████| 74/74 [00:00<00:00, 101.41 examples/s]


### Verify the Masking

In [21]:
tokenizer.decode(trainer.train_dataset[0]["input_ids"])

'<bos><start_of_turn>user\nপ্রথম n সংখ্যার ক্ষুদ্রতম গুণিতক খুঁজে বের করার জন্য একটি ফাংশন লিখুন।<end_of_turn>\n<start_of_turn>model\ndef smallest_multiple(n):\r\n    if (n<=2):\r\n      return n\r\n    i = n * 2\r\n    factors = [number  for number in range(n, 1, -1) if number * 2 > n]\r\n    while True:\r\n        for a in factors:\r\n            if i % a != 0:\r\n                i += n\r\n                break\r\n            if (a == factors[-1] and i % a == 0):\r\n                return i<end_of_turn>\n'

### Now let's print the masked out example - you should see only the answer is present:

In [22]:
tokenizer.decode([tokenizer.pad_token_id if x == -100 else x for x in trainer.train_dataset[0]["labels"]]).replace(tokenizer.pad_token, " ")

'                             def smallest_multiple(n):\r\n    if (n<=2):\r\n      return n\r\n    i = n * 2\r\n    factors = [number  for number in range(n, 1, -1) if number * 2 > n]\r\n    while True:\r\n        for a in factors:\r\n            if i % a != 0:\r\n                i += n\r\n                break\r\n            if (a == factors[-1] and i % a == 0):\r\n                return i<end_of_turn>\n'

## Step 6 - Let's Finetune 🔥🔥

In [23]:
# @title Show current memory stats
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

GPU = NVIDIA GeForce GTX 1650 SUPER. Max memory = 3.622 GB.
2.432 GB of memory reserved.


### Start Finetuning....... 🔥🔥

In [24]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 74 | Num Epochs = 2 | Total steps = 4
O^O/ \_/ \    Batch size per device = 16 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (16 x 4 x 1) = 64
 "-____-"     Trainable parameters = 13,045,760 of 1,012,931,712 (1.29% trained)
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 74 | Num Epochs = 2 | Total steps = 4
O^O/ \_/ \    Batch size per device = 14 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (14 x 4 x 1) = 56
 "-____-"     Trainable parameters = 13,045,760 of 1,012,931,712 (1.29% trained)
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 74 | Num Epochs = 2 | Total steps = 4
O^O/ \_/ \    Batch size per device = 12 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch siz

Unsloth: Will smartly offload gradients to save VRAM!


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 74 | Num Epochs = 1 | Total steps = 4
O^O/ \_/ \    Batch size per device = 6 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (6 x 4 x 1) = 24
 "-____-"     Trainable parameters = 13,045,760 of 1,012,931,712 (1.29% trained)
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 74 | Num Epochs = 1 | Total steps = 4
O^O/ \_/ \    Batch size per device = 5 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (5 x 4 x 1) = 20
 "-____-"     Trainable parameters = 13,045,760 of 1,012,931,712 (1.29% trained)
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 74 | Num Epochs = 1 | Total steps = 4
O^O/ \_/ \    Batch size per device = 4 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (4 

Step,Training Loss
1,1.7956
2,1.8891


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 74 | Num Epochs = 1 | Total steps = 4
O^O/ \_/ \    Batch size per device = 3 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (3 x 4 x 1) = 12
 "-____-"     Trainable parameters = 13,045,760 of 1,012,931,712 (1.29% trained)


Step,Training Loss
1,1.7952
2,1.6022
3,1.8333
4,1.7652


## Step 7 - Let's Use Our New Finetuned Model

In [25]:
from unsloth.chat_templates import get_chat_template

# Attach the Gemma-3 chat template to the tokenizer
tokenizer = get_chat_template(tokenizer, chat_template="gemma-3")

# Compose the conversation
bangla_q = (
    "মৌলিক সংখ্যা বের করার জন্য একটি পাইথন ফাংশন লিখুন"
)

messages = [
    {
        "role": "user",
        "content": bangla_q,
    }
]

# Render the chat template to plain text
chat_text = tokenizer.apply_chat_template(
    messages,
    add_generation_prompt=True,  # must be True for generation
    tokenize=False               # return a plain string
)

# Tokenize and generate
inputs = tokenizer(chat_text, return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    use_cache=False,
    max_new_tokens=1012,           # increase for longer outputs
    temperature=0.3,
    top_p=0.95,
    top_k=64,
)

print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])


user
মৌলিক সংখ্যা বের করার জন্য একটি পাইথন ফাংশন লিখুন
model
```python
def is_prime(n):
    if n <= 1:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True
```


## Step 8 - Use New Model on Dev Set

In [26]:
import pandas as pd
import torch
from tqdm.auto import tqdm
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(tokenizer, chat_template="gemma-3")

# Load prompts
df = pd.read_csv(dataset_dev_path)  # expects columns: id, instruction
assert {"id", "instruction"}.issubset(df.columns), "CSV must have columns: id, instruction"

responses = []
for prompt in tqdm(df["instruction"], desc="Generating"):
    chat_text = tokenizer.apply_chat_template(
        [{"role": "user", "content": str(prompt)}],
        add_generation_prompt=True,
        tokenize=False,
    )
    inputs = tokenizer(chat_text, return_tensors="pt").to(model.device)

    with torch.no_grad():
        out = model.generate(
            **inputs,
            use_cache=False,
            max_new_tokens=256,
            temperature=0.3,
            top_p=0.95,
            top_k=64,
        )

    gen_ids  = out[0][inputs["input_ids"].shape[1]:]
    gen_text = tokenizer.decode(gen_ids, skip_special_tokens=True).strip()
    responses.append(gen_text)

# Save ONLY id + response as JSON
out_df = pd.DataFrame({"id": df["id"], "response": responses})
out_df.to_json("submission.json", orient="records", force_ascii=False, indent=2)
print(f"✅ Wrote submission.json with {len(out_df)} rows (id, response).")


Generating: 100%|██████████| 400/400 [3:08:59<00:00, 28.35s/it]   

✅ Wrote submission.json with 400 rows (id, response).





## Step 9 - Preparing Submission File

In [27]:
import json, os, re, zipfile

SUB_PATH = "submission.json"

def file_format_check(path: str) -> bool:
    # name + extension
    if os.path.basename(path) != "submission.json":
        print("Error: File name must be exactly 'submission.json'")
        return False
    if not path.lower().endswith(".json"):
        print("Error: File must have .json extension")
        return False

    # must be valid JSON (not JSONL) and root must be a list
    try:
        with open(path, "r", encoding="utf-8") as f:
            data = json.load(f)
    except json.JSONDecodeError as e:
        print(f"Error: Invalid JSON format - {e}")
        print("Note: The file must be in proper JSON format (not JSONL)")
        return False

    if not isinstance(data, list):
        print("Error: The root element should be a list of objects")
        return False

    # each item: dict with ONLY keys {'id','response'}; id=int; response=str
    for idx, item in enumerate(data):
        if not isinstance(item, dict):
            print(f"Error: Item at index {idx} is not a dictionary")
            return False
        keys = set(item.keys())
        if keys != {"id", "response"}:
            print(f"Error: Item at index {idx} must contain only keys 'id' and 'response', found: {keys}")
            return False
        if not isinstance(item["id"], int):
            print(f"Error: 'id' field at index {idx} must be an integer")
            return False
        if not isinstance(item["response"], str):
            print(f"Error: 'response' field at index {idx} must be a string")
            return False

    print("Format check passed successfully!")
    return True

# ---------- Load, compute per-item validity, blank invalids, save, zip ----------
# Load JSON list
with open(SUB_PATH, "r", encoding="utf-8") as f:
    data = json.load(f)

n = len(data)
fence_pat = re.compile(r"^```python[\s\S]*```$", re.MULTILINE)

valid_format = []
valid_fence  = []
valid_both   = []

# Per-item validation mirrors file checker semantics
def item_format_ok(item):
    return (
        isinstance(item, dict)
        and set(item.keys()) == {"id", "response"}
        and isinstance(item["id"], int)
        and isinstance(item["response"], str)
    )

for item in data:
    vfmt = item_format_ok(item)
    vf   = bool(fence_pat.match(item["response"])) if vfmt else False
    valid_format.append(vfmt)
    valid_fence.append(vf)
    valid_both.append(vfmt and vf)

# Report stats
nf = sum(valid_fence)
nm = sum(valid_format)
nb = sum(valid_both)
den = max(n, 1)
print(f"Fencing valid: {nf}/{n} ({nf*100.0/den:.1f}%)")
print(f"Format valid:  {nm}/{n} ({nm*100.0/den:.1f}%)")
print(f"Both valid:    {nb}/{n} ({nb*100.0/den:.1f}%)")

# Strict policy: blank responses that fail ANY check
for i, ok in enumerate(valid_both):
    if not ok and isinstance(data[i], dict) and "response" in data[i]:
        data[i]["response"] = ""

# Overwrite submission.json (id+response only)
with open(SUB_PATH, "w", encoding="utf-8") as f:
    json.dump(
        [{"id": item["id"], "response": item["response"]} for item in data],
        f, ensure_ascii=False, indent=2
    )
print("✅ Updated submission.json after checks (invalid responses blanked).")

# Final file-level check (should pass)
_ = file_format_check(SUB_PATH)

# Zip as submission.zip (Jupyter-friendly, no shell commands)
with zipfile.ZipFile("submission.zip", "w", compression=zipfile.ZIP_DEFLATED) as zf:
    zf.write(SUB_PATH)
print("📦 Created submission.zip containing submission.json.")


Fencing valid: 383/400 (95.8%)
Format valid:  400/400 (100.0%)
Both valid:    383/400 (95.8%)
✅ Updated submission.json after checks (invalid responses blanked).
Format check passed successfully!
📦 Created submission.zip containing submission.json.


# Submit the submission.zip file in CodaBench

### Save the NEW model...if it's good :)

In [28]:
model.save_pretrained("New_Model")  # Local saving
tokenizer.save_pretrained("New_Model")

('New_Model/tokenizer_config.json',
 'New_Model/special_tokens_map.json',
 'New_Model/chat_template.jinja',
 'New_Model/tokenizer.model',
 'New_Model/added_tokens.json',
 'New_Model/tokenizer.json')