In [1]:
%pip install -q -U unsloth "datasets<4.0.0" trl


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.6/66.6 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m389.2/389.2 kB[0m [31m12.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m423.1/423.1 kB[0m [31m23.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.1/59.1 MB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m310.8/310.8 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.9/122.9 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m899.7/899.7 MB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━

In [2]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 8192 # Qwen supports up to 32k
dtype = None # Auto detection
load_in_4bit = True # Quantization to fit in Colab T4 GPU

# 1. Load Qwen 2.5 Coder (1.5B is the sweet spot for speed/quality)
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "Qwen/Qwen2.5-Coder-1.5B-Instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2026.1.3: Fast Qwen2 patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/265 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [3]:
# 2. Add LoRA adapters (This is what actually gets trained)
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Rank
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",    # Optimized = "none"
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None,
)

Unsloth 2026.1.3 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


In [4]:
from datasets import load_dataset, DatasetDict, concatenate_datasets
import numpy as np
import re

SEED = 3407
MAX_LOG_CHARS = 6000

raw = load_dataset("alexjercan/bugnet")  # has train/validation/test

# Use ALL splits then re-split by problem_id to avoid leakage across splits
dataset = concatenate_datasets([raw["train"], raw["validation"], raw["test"]])

# Basic filters
dataset = dataset.filter(lambda x: x.get("language") == "Python")
dataset = dataset.filter(lambda x: isinstance(x.get("fail"), str) and isinstance(x.get("pass"), str))
dataset = dataset.filter(lambda x: len(x["fail"].strip()) > 0 and len(x["pass"].strip()) > 0)

# Keep only cases where original was NOT accepted (we're learning repair)
dataset = dataset.filter(lambda x: (x.get("original_status") is None) or (x["original_status"] != "Accepted"))

# ---------------------------
# Relaxed consistency filter (do NOT be too strict)
# ---------------------------
def is_consistent(example):
    fail = example["fail"]
    stderr = (example.get("stderr") or "")
    # If there's a traceback line shown, it's usually indented by 4 spaces.
    code_lines = [ln.strip() for ln in stderr.splitlines() if ln.startswith("    ")]
    if not code_lines:
        return True
    failing_line = code_lines[-1].strip()
    if len(failing_line) < 6:
        return True
    return failing_line in fail

dataset = dataset.filter(is_consistent)

# ---------------------------
# Split by problem_id
# ---------------------------
pids = dataset.unique("problem_id")
rng = np.random.default_rng(SEED)
rng.shuffle(pids)

n = len(pids)
n_test = max(20, int(0.05 * n))   # 5% or at least 20 problems
n_val  = max(20, int(0.05 * n))   # 5% or at least 20 problems

test_pids  = set(pids[:n_test])
val_pids   = set(pids[n_test:n_test + n_val])
train_pids = set(pids[n_test + n_val:])

def pid_in(pidset):
    return lambda x: x["problem_id"] in pidset

ds_raw = DatasetDict({
    "train": dataset.filter(pid_in(train_pids)),
    "validation": dataset.filter(pid_in(val_pids)),
    "test": dataset.filter(pid_in(test_pids)),
})

print("rows:", {k: len(v) for k, v in ds_raw.items()})
print("unique problem_id:", {k: len(v.unique('problem_id')) for k, v in ds_raw.items()})

# ---------------------------
# Formatter (FULL PROGRAM OUTPUT)
# ---------------------------
def format_codesensei_prompts(example):
    stderr = (example.get("stderr") or "").strip()
    err    = (example.get("error")  or "").strip()
    status = (example.get("original_status") or "").strip()
    stdout = (example.get("stdout") or "").strip()

    trace = stderr if stderr else err
    if not trace:
        trace = f"No stack trace captured. original_status={status}. stdout={stdout}"

    if len(trace) > MAX_LOG_CHARS:
        trace = "...(truncated)...\n" + trace[-MAX_LOG_CHARS:]

    buggy_code = example["fail"]
    fixed_code = example["pass"]

    user_content = (
        "Fix the following Python program using the error trace when available.\n"
        "Return ONLY the full fixed program (no markdown, no explanation, no extra tests).\n\n"
        "### BUGGY CODE:\n"
        f"{buggy_code}\n\n"
        "### ERROR TRACE:\n"
        f"{trace}\n"
    )

    messages = [
        {"role": "system", "content": "You are CodeSensei, a Python program repair assistant."},
        {"role": "user", "content": user_content},
        {"role": "assistant", "content": fixed_code},
    ]

    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
    return {"text": text}

ds = ds_raw.map(format_codesensei_prompts)

# sanity check
sample = ds["train"][0]["text"]
assistant_part = sample.split("<|im_start|>assistant", 1)[-1].split("<|im_end|>", 1)[0].strip()
print(sample[:1200])
print("\n--- assistant starts with ---\n", assistant_part[:300])


README.md: 0.00B [00:00, ?B/s]

bugnet.py: 0.00B [00:00, ?B/s]

The repository for alexjercan/bugnet contains custom code which must be executed to correctly load the dataset. You can inspect the repository content at https://hf.co/datasets/alexjercan/bugnet.
You can avoid this prompt in future by passing the argument `trust_remote_code=True`.

Do you wish to run the custom code? [y/N] y


Python_train.jsonl:   0%|          | 0.00/2.73M [00:00<?, ?B/s]

Python_validation.jsonl:   0%|          | 0.00/1.05M [00:00<?, ?B/s]

Python_test.jsonl:   0%|          | 0.00/87.9k [00:00<?, ?B/s]

problem_descriptions.json:   0%|          | 0.00/14.9M [00:00<?, ?B/s]

problem_tests.json:   0%|          | 0.00/517k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/2557 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1105 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/100 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3762 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3762 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3762 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3762 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3762 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/3114 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3114 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3114 [00:00<?, ? examples/s]

Filter:   0%|          | 0/3114 [00:00<?, ? examples/s]

rows: {'train': 2786, 'validation': 123, 'test': 205}


Flattening the indices:   0%|          | 0/2786 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/123 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/205 [00:00<?, ? examples/s]

unique problem_id: {'train': 590, 'validation': 32, 'test': 32}


Map:   0%|          | 0/2786 [00:00<?, ? examples/s]

Map:   0%|          | 0/123 [00:00<?, ? examples/s]

Map:   0%|          | 0/205 [00:00<?, ? examples/s]

<|im_start|>system
You are CodeSensei, a Python program repair assistant.<|im_end|>
<|im_start|>user
Fix the following Python program using the error trace when available.
Return ONLY the full fixed program (no markdown, no explanation, no extra tests).

### BUGGY CODE:
S = input()
T = input()

maxcount = 0
for i in range(len(S) - len(T) + 1):
    count = 0
    for j in range(len(T)):
        if S[i + j] == T[j]:
            count += 1
        maxcount = max(maxcount, count)
print(len(T) - maxcount)


### ERROR TRACE:
WA
<|im_end|>
<|im_start|>assistant
S = input()
T = input()

for i in range(len(S) - len(T)):
    count = 0
    maxcount = 0
    for j in range(len(T)):
        if S[i + j] == T[j]:
            count += 1
        maxcount = max(maxcount, count)
print(len(T) - maxcount)
<|im_end|>


--- assistant starts with ---
 S = input()
T = input()

for i in range(len(S) - len(T)):
    count = 0
    maxcount = 0
    for j in range(len(T)):
        if S[i + j] == T[j]:
            coun

In [5]:
train_ds = ds["train"].select_columns(["text"])
eval_ds  = ds["validation"].select_columns(["text"])

print(train_ds[0])
print(eval_ds[0])
print(len(train_ds), len(eval_ds))

{'text': '<|im_start|>system\nYou are CodeSensei, a Python program repair assistant.<|im_end|>\n<|im_start|>user\nFix the following Python program using the error trace when available.\nReturn ONLY the full fixed program (no markdown, no explanation, no extra tests).\n\n### BUGGY CODE:\nS = input()\nT = input()\n\nmaxcount = 0\nfor i in range(len(S) - len(T) + 1):\n    count = 0\n    for j in range(len(T)):\n        if S[i + j] == T[j]:\n            count += 1\n        maxcount = max(maxcount, count)\nprint(len(T) - maxcount)\n\n\n### ERROR TRACE:\nWA\n<|im_end|>\n<|im_start|>assistant\nS = input()\nT = input()\n\nfor i in range(len(S) - len(T)):\n    count = 0\n    maxcount = 0\n    for j in range(len(T)):\n        if S[i + j] == T[j]:\n            count += 1\n        maxcount = max(maxcount, count)\nprint(len(T) - maxcount)\n<|im_end|>\n'}
{'text': '<|im_start|>system\nYou are CodeSensei, a Python program repair assistant.<|im_end|>\n<|im_start|>user\nFix the following Python program

In [6]:
from trl import SFTTrainer, SFTConfig
from transformers import EarlyStoppingCallback
from unsloth import is_bfloat16_supported
from dataclasses import dataclass
import torch

# -------------------------
# Tokenizer padding (important)
# -------------------------
if tokenizer.pad_token is None:
    # Qwen2.5-Coder has a real pad token in vocab in most setups
    if "<|fim_pad|>" in tokenizer.get_vocab():
        tokenizer.pad_token = "<|fim_pad|>"
    else:
        tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"  # for training

# -------------------------
# Completion-only collator
# -------------------------
ASSISTANT_PREFIX = "<|im_start|>assistant\n"
assistant_prefix_ids = tokenizer(ASSISTANT_PREFIX, add_special_tokens=False).input_ids

def _find_sublist(haystack, needle):
    L = len(needle)
    for i in range(len(haystack) - L + 1):
        if haystack[i:i+L] == needle:
            return i
    return -1

@dataclass
class CompletionOnlyCollator:
    tokenizer: any
    assistant_prefix_ids: list

    def __call__(self, features):
        # SFTTrainer will tokenize "text" into input_ids; we just pad + build labels here.
        batch = self.tokenizer.pad(features, padding=True, return_tensors="pt")

        labels = batch["input_ids"].clone()
        for i in range(labels.size(0)):
            ids = batch["input_ids"][i].tolist()
            start = _find_sublist(ids, self.assistant_prefix_ids)
            if start == -1:
                labels[i, :] = -100
            else:
                cutoff = start + len(self.assistant_prefix_ids)
                labels[i, :cutoff] = -100

        batch["labels"] = labels
        return batch

collator = CompletionOnlyCollator(tokenizer, assistant_prefix_ids)

train_ds = ds["train"].select_columns(["text"])
eval_ds  = ds["validation"].select_columns(["text"])

trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_ds,
    eval_dataset=eval_ds,
    data_collator=collator,
    dataset_text_field="text",
    args=SFTConfig(
        output_dir="outputs",
        max_seq_length=max_seq_length,
        packing=False,

        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,

        # For small datasets, 2e-4 can overfit fast; start lower
        learning_rate=1e-4,
        weight_decay=0.01,
        lr_scheduler_type="linear",
        warmup_ratio=0.03,

        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),

        logging_steps=10,
        eval_strategy="steps",
        eval_steps=50,
        save_strategy="steps",
        save_steps=50,
        save_total_limit=2,

        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,

        num_train_epochs=2,   # early stopping will stop earlier if needed
        seed=SEED,
        optim="adamw_8bit",
        max_grad_norm=1.0,
        remove_unused_columns=True,
        report_to="none",
    ),
    callbacks=[EarlyStoppingCallback(early_stopping_patience=3)],
)

trainer.train()


Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/2786 [00:00<?, ? examples/s]

Unsloth: Tokenizing ["text"] (num_proc=4):   0%|          | 0/123 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 2,786 | Num Epochs = 2 | Total steps = 698
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 18,464,768 of 1,562,179,072 (1.18% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss,Validation Loss
50,0.9587,0.446137
100,0.5701,0.329323
150,0.5381,0.326597
200,0.5756,0.326191
250,0.6037,0.324454
300,0.4745,0.324753
350,0.5182,0.323607
400,0.5117,0.323655


Unsloth: Not an error, but Qwen2ForCausalLM does not accept `num_items_in_batch`.
Using gradient accumulation will be very slightly less accurate.
Read more on gradient accumulation issues here: https://unsloth.ai/blog/gradient


KeyboardInterrupt: 

In [8]:
from unsloth import FastLanguageModel

FastLanguageModel.for_inference(model)

# IMPORTANT: test with a FULL PROGRAM if that's what you trained on
buggy_code = """D, T, S = map(int, input().split())
if D <= T * S:
    print("Yes")
else:
    print("No")
"""

error_log = """Traceback (most recent call last):
  File "script.py", line 1, in <module>
    D = int(input())
ValueError: invalid literal for int() with base 10: '1000 15 80'
"""

messages = [
    {"role": "system", "content": "You are CodeSensei, a Python program repair assistant."},
    {"role": "user", "content":
        "Fix the following Python program using the error trace when available.\n"
        "Return ONLY the full fixed program (no markdown, no explanation, no extra tests).\n\n"
        f"### BUGGY CODE:\n{buggy_code}\n\n### ERROR TRACE:\n{error_log}\n"
    },
]

# Use real pad token if available
if tokenizer.pad_token is None and "<|fim_pad|>" in tokenizer.get_vocab():
    tokenizer.pad_token = "<|fim_pad|>"
tokenizer.padding_side = "left"  # nicer for generation batching

input_ids = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")

# Build attention mask explicitly (prevents the pad==eos warning)
attention_mask = (input_ids != tokenizer.pad_token_id).long()

im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
eos_ids = [tokenizer.eos_token_id]
if im_end_id is not None and im_end_id not in eos_ids:
    eos_ids.append(im_end_id)

out = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_new_tokens=512,
    do_sample=False,
    temperature=0.0,
    eos_token_id=eos_ids,
    use_cache=True,
)

decoded = tokenizer.decode(out[0], skip_special_tokens=False)

# Print ONLY assistant content
assistant = decoded.split("<|im_start|>assistant", 1)[-1].split("<|im_end|>", 1)[0].strip()
print(assistant)


D, T, S = map(int, input().split())
if D <= T * S:
    print("Yes")
else:
    print("No")


In [11]:
ex = ds_raw["test"][50]   # or any index
buggy_code = ex["fail"]
error_log = (ex.get("stderr") or ex.get("error") or "")
gold = ex["pass"]
print("BUGGY:\n", buggy_code[:400])
print("\nLOG:\n", error_log[:400])
print("\nGOLD starts:\n", gold[:200])


messages = [
    {"role": "system", "content": "You are CodeSensei, a Python program repair assistant."},
    {"role": "user", "content":
        "Fix the following Python program using the error trace when available.\n"
        "Return ONLY the full fixed program (no markdown, no explanation, no extra tests).\n\n"
        f"### BUGGY CODE:\n{buggy_code}\n\n### ERROR TRACE:\n{error_log}\n"
    },
]

# Use real pad token if available
if tokenizer.pad_token is None and "<|fim_pad|>" in tokenizer.get_vocab():
    tokenizer.pad_token = "<|fim_pad|>"
tokenizer.padding_side = "left"  # nicer for generation batching

input_ids = tokenizer.apply_chat_template(
    messages,
    tokenize=True,
    add_generation_prompt=True,
    return_tensors="pt",
).to("cuda")

# Build attention mask explicitly (prevents the pad==eos warning)
attention_mask = (input_ids != tokenizer.pad_token_id).long()

im_end_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
eos_ids = [tokenizer.eos_token_id]
if im_end_id is not None and im_end_id not in eos_ids:
    eos_ids.append(im_end_id)

out = model.generate(
    input_ids=input_ids,
    attention_mask=attention_mask,
    max_new_tokens=512,
    do_sample=False,
    temperature=0.0,
    eos_token_id=eos_ids,
    use_cache=True,
)

decoded = tokenizer.decode(out[0], skip_special_tokens=False)

# Print ONLY assistant content
assistant = decoded.split("<|im_start|>assistant", 1)[-1].split("<|im_end|>", 1)[0].strip()
print("generated response strts here : ", assistant)


BUGGY:
 n = int(input())
a = [int(input()) for _ in range(n)]
b, c = sorted(a)[-2:]
j = a.index(c)
for i in range(n):
    print((c, b)[i == j])


LOG:
 TLE

GOLD starts:
 n = int(input())
a = [int(input()) for _ in range(n)]
b, c = sorted(a)[-2:]
for i in range(n):
    print(b if a.index(c) == i else c)

generated response strts here :  n = int(input())
a = [int(input()) for _ in range(n)]
b, c = sorted(a)[-2:]
for i in range(n):
    if a[i] == c:
        print(b)
    else:
        print(c)


In [9]:
from google.colab import drive
import os

# 1. Mount Google Drive
drive.mount('/content/drive')

# 2. Create a folder in Drive
save_path = "/content/drive/My Drive/CodeSensei_Adapters"
if not os.path.exists(save_path):
    os.makedirs(save_path)

# 3. Save Adapters ONLY (Safe, low RAM)
model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

print(f"✅ Adapters saved to {save_path}")
print("🚨 NOW: Go to 'Runtime' > 'Disconnect and Delete Runtime'.")
print("Then, refresh the page and run the cell below (Cell 7).")

Mounted at /content/drive
✅ Adapters saved to /content/drive/My Drive/CodeSensei_Adapters
🚨 NOW: Go to 'Runtime' > 'Disconnect and Delete Runtime'.
Then, refresh the page and run the cell below (Cell 7).


In [2]:
# --- RUN THIS AFTER RESTARTING RUNTIME ---\
from unsloth import FastLanguageModel
from google.colab import drive, files
import os

# 1. Setup Environment again (Since we killed the runtime)
# try:
#    import unsloth
#except ImportError:
#    !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
#    !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

# 2. Mount Drive
drive.mount('/content/drive')
adapter_path = "/content/drive/My Drive/CodeSensei_Adapters"

# 3. Load Model + Adapters (Fresh RAM!)
# Unsloth automatically finds the base model (Qwen) referenced in your adapter config
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = adapter_path,
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
)

# 4. Convert to GGUF
# This will now work because RAM is empty
print("Converting to GGUF... this may take 5-10 minutes.")
model.save_pretrained_gguf(
    "model_gguf",
    tokenizer,
    quantization_method = "q4_k_m"
)

# 5. Download the File
files_in_folder = os.listdir("model_gguf")
gguf_file = [f for f in files_in_folder if f.endswith('.gguf')][0]
full_path = os.path.join("model_gguf", gguf_file)

print(f"✅ Download ready: {full_path}")
files.download(full_path)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
Mounted at /content/drive
==((====))==  Unsloth 2026.1.3: Fast Qwen2 patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/1.14G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/265 [00:00<?, ?B/s]

Unsloth 2026.1.3 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


Converting to GGUF... this may take 5-10 minutes.
Unsloth: Merging model weights to 16-bit format...


config.json:   0%|          | 0.00/765 [00:00<?, ?B/s]

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub
Checking cache directory for required files...
Cache check failed: model.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files:   0%|          | 0/1 [00:00<?, ?it/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files: 100%|██████████| 1/1 [00:43<00:00, 43.01s/it]


Note: tokenizer.model not found (this is OK for non-SentencePiece models)


Unsloth: Merging weights into 16bit: 100%|██████████| 1/1 [00:52<00:00, 52.32s/it]


Unsloth: Merge process complete. Saved to `/content/model_gguf`
Unsloth: Converting to GGUF format...
==((====))==  Unsloth: Conversion from HF to GGUF information
   \\   /|    [0] Installing llama.cpp might take 3 minutes.
O^O/ \_/ \    [1] Converting HF to GGUF f16 might take 3 minutes.
\        /    [2] Converting GGUF f16 to ['q4_k_m'] might take 10 minutes each.
 "-____-"     In total, you will have to wait at least 16 minutes.

Unsloth: Installing llama.cpp. This might take 3 minutes...
Unsloth: Updating system package directories
Unsloth: All required system packages already installed!
Unsloth: Install llama.cpp and building - please wait 1 to 3 minutes
Unsloth: Cloning llama.cpp repository
Unsloth: Install GGUF and other packages
Unsloth: Successfully installed llama.cpp!
Unsloth: Preparing converter script...
Unsloth: [1] Converting model into f16 GGUF format.
This might take 3 minutes...
Unsloth: Initial conversion completed! Files: ['qwen2.5-coder-1.5b-instruct.F16.gguf']
U

IndexError: list index out of range

In [3]:
import os
from google.colab import files

print("🔍 Searching for your GGUF file...")

# 1. Search every folder in the environment for the file
found_file = None
for root, dirs, files_list in os.walk("/content"):
    for filename in files_list:
        if filename.endswith(".gguf"):
            found_file = os.path.join(root, filename)
            break
    if found_file:
        break

# 2. Trigger the Download
if found_file:
    print(f"✅ Found file at: {found_file}")
    print(f"📦 File size: {os.path.getsize(found_file) / 1024 / 1024 / 1024:.2f} GB")
    print("🚀 Starting download... (Check your browser's download bar)")
    files.download(found_file)
else:
    print("❌ Error: No .gguf file found. The conversion step might have failed.")

🔍 Searching for your GGUF file...
✅ Found file at: /content/qwen2.5-coder-1.5b-instruct.Q4_K_M.gguf
📦 File size: 0.92 GB
🚀 Starting download... (Check your browser's download bar)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>