In [2]:
 !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
 !pip install --no-deps xformers "trl<0.9.0" peft accelerate bitsandbytes

Collecting unsloth@ git+https://github.com/unslothai/unsloth.git (from unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Cloning https://github.com/unslothai/unsloth.git to /tmp/pip-install-apwg3z7p/unsloth_7d1beedb8876439d8749945a74070788
  Running command git clone --filter=blob:none --quiet https://github.com/unslothai/unsloth.git /tmp/pip-install-apwg3z7p/unsloth_7d1beedb8876439d8749945a74070788
  Resolved https://github.com/unslothai/unsloth.git to commit cf4342bf41e4a93573d08392b11f8093b30ddb8f
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting unsloth_zoo>=2025.12.7 (from unsloth@ git+https://github.com/unslothai/unsloth.git->unsloth[colab-new]@ git+https://github.com/unslothai/unsloth.git)
  Downloading unsloth_zoo-2025.12.7-py3-none-any.whl.metadata (32 kB)
Collecting tyro (from unsloth@ git+https://github.com/unslothai/unsloth.gi

In [3]:
import os
import shutil
from unsloth import FastLanguageModel
import torch
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import load_from_disk

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!


In [4]:
MODEL_TYPE = "qwen"  # "qwen" or "phi3"

CONFIGS = {
    "qwen": {
        "model_id": "unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit",
        "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj",
                          "gate_proj", "up_proj", "down_proj"],
        "data_folder": "qwen",
    },
    "phi3": {
        # Phi-3 is 3.8B params - much faster than Qwen's 7B!
        "model_id": "unsloth/Phi-3-mini-4k-instruct-bnb-4bit",
        "target_modules": ["qkv_proj", "o_proj", "gate_up_proj", "down_proj"],
        "data_folder": "phi3",
    },
}

CONFIG = {
    **CONFIGS[MODEL_TYPE],
    "model_type": MODEL_TYPE,
    "max_seq_length": 2048,
    "load_in_4bit": True,

    # Paths - UPDATE THESE!
    "drive_data_path": f"/content/drive/MyDrive/text_to_sql/processed_data/{CONFIGS[MODEL_TYPE]['data_folder']}",
    "local_data_path": f"/content/data_cache/{MODEL_TYPE}",
    "output_dir": f"spider_{MODEL_TYPE}_lora",

    # Training hyperparameters (Run #2: increased epochs)
    "num_epochs": 2,  # Increased from 1 to better override instruct behavior
    "batch_size": 4,
    "gradient_accumulation_steps": 4,
    "learning_rate": 2e-4,

    # LoRA config
    "lora_r": 16,
    "lora_alpha": 16,
    "lora_dropout": 0,

    # IMPORTANT: Eval settings
    "eval_steps": 50,
    "eval_batch_size": 8,
}

print(f"Configuration for: {MODEL_TYPE.upper()}")
print(f"   Model: {CONFIG['model_id']}")
print(f"   Data path: {CONFIG['drive_data_path']}")

Configuration for: QWEN
   Model: unsloth/Qwen2.5-Coder-7B-Instruct-bnb-4bit
   Data path: /content/drive/MyDrive/text_to_sql/processed_data/qwen


In [6]:
def copy_data_to_local():
    if os.path.exists(CONFIG["local_data_path"]):
        print(f"Data already exists at {CONFIG['local_data_path']}")
        return
    print(f"Copying data from Drive to Local Disk...")
    os.makedirs(os.path.dirname(CONFIG["local_data_path"]), exist_ok=True)
    shutil.copytree(CONFIG["drive_data_path"], CONFIG["local_data_path"])
    print(f"Copy complete!")

copy_data_to_local()

Copying data from Drive to Local Disk...
Copy complete!


In [7]:
print(f"Loading {MODEL_TYPE.upper()} with Unsloth...")

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=CONFIG["model_id"],
    max_seq_length=CONFIG["max_seq_length"],
    dtype=None,
    load_in_4bit=CONFIG["load_in_4bit"],
)

model = FastLanguageModel.get_peft_model(
    model,
    r=CONFIG["lora_r"],
    target_modules=CONFIG["target_modules"],
    lora_alpha=CONFIG["lora_alpha"],
    lora_dropout=CONFIG["lora_dropout"],
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=42,
)

print("Model Loaded and LoRA Applied")

Loading QWEN with Unsloth...
==((====))==  Unsloth 2025.12.9: Fast Qwen2 patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.0+cu126. CUDA: 7.5. CUDA Toolkit: 12.6. Triton: 3.5.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.55G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/265 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/632 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/613 [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

Unsloth 2025.12.9 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.


Model Loaded and LoRA Applied


In [8]:
train_dataset = load_from_disk(os.path.join(CONFIG["local_data_path"], "train"))
eval_dataset = load_from_disk(os.path.join(CONFIG["local_data_path"], "validation"))

print(f"Train: {len(train_dataset)} samples")
print(f"Eval: {len(eval_dataset)} samples")

Train: 15338 samples
Eval: 1034 samples


In [11]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    # eval_dataset=eval_dataset,
    dataset_text_field="text",
    max_seq_length=CONFIG["max_seq_length"],
    dataset_num_proc=2,
    packing=True,

    args=TrainingArguments(
        output_dir=CONFIG["output_dir"],

        # Training
        per_device_train_batch_size=CONFIG["batch_size"],
        gradient_accumulation_steps=CONFIG["gradient_accumulation_steps"],
        num_train_epochs=CONFIG["num_epochs"],

        # Learning rate
        learning_rate=CONFIG["learning_rate"],
        lr_scheduler_type="cosine",
        warmup_ratio=0.03,
        weight_decay=0.01,

        # Precision
        fp16=not torch.cuda.is_bf16_supported(),
        bf16=torch.cuda.is_bf16_supported(),

        # Optimizer
        optim="adamw_8bit",

        # EVALUATION - THIS WAS MISSING!
        eval_strategy="no",
        # eval_steps=CONFIG["eval_steps"],
        per_device_eval_batch_size=CONFIG["eval_batch_size"],

        # Logging
        logging_steps=25,
        logging_first_step=True,

        # Saving
        save_strategy="steps",
        save_steps=50,
        save_total_limit=3,

        seed=42,
        report_to="none",
    ),
)

print("Trainer ready!")

Generating train split: 0 examples [00:00, ? examples/s]

Trainer ready!


In [None]:
print("Starting Training...")
trainer_stats = trainer.train()
print(f"Training Complete! Loss: {trainer_stats.training_loss:.4f}")

In [None]:
final_path = os.path.join(CONFIG["output_dir"], "final_adapter")
model.save_pretrained(final_path)
tokenizer.save_pretrained(final_path)
print(f"Model saved to: {final_path}")

drive_path = f"/content/drive/MyDrive/text_to_sql/checkpoints/final_adapter_{MODEL_TYPE}"
shutil.copytree(final_path, drive_path, dirs_exist_ok=True)
print(f"Copied to Drive: {drive_path}")

In [None]:
FastLanguageModel.for_inference(model)

STRICT_SYSTEM = """You are a SQL query generator. Your ONLY task is to convert natural language questions into SQL queries.

CRITICAL RULES:
1. Output ONLY the raw SQL query - nothing else
2. Do NOT include explanations, comments, or markdown
3. Do NOT wrap the query in code blocks
4. Use the EXACT table and column names from the schema (preserve original casing)
5. Do NOT use DISTINCT unless explicitly required by the question
6. Do NOT add column aliases unless necessary for clarity
7. Use SQLite syntax"""

schema = "Table: users\nColumns: id, name, email, age"
question = "How many users are older than 30?"

if MODEL_TYPE == "qwen":
    # Qwen format
    prompt = f"<|im_start|>system\n{STRICT_SYSTEM}<|im_end|>\n"
    prompt += f"<|im_start|>user\n### Database Schema:\n{schema}\n\n### Question:\n{question}<|im_end|>\n"
    prompt += "<|im_start|>assistant\n"
else:
    # Phi-3 format (With safety space)
    prompt = f"<|system|>\n{STRICT_SYSTEM} <|end|>\n"
    prompt += f"<|user|>\n### Database Schema:\n{schema}\n\n### Question:\n{question} <|end|>\n"
    prompt += "<|assistant|>\n"

print(f"Test Prompt:\n{prompt}")

inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

outputs = model.generate(
    **inputs,
    max_new_tokens=128,
    use_cache=True,
    pad_token_id=tokenizer.eos_token_id,
    eos_token_id=tokenizer.eos_token_id
)

decoded = tokenizer.batch_decode(outputs, skip_special_tokens=False)[0]

# Robust Extraction
try:
    if MODEL_TYPE == "qwen":
        sql = decoded.split("<|im_start|>assistant")[-1].split("<|im_end|>")[0].strip()
    else:
        # Phi-3 logic
        sql = decoded.split("<|assistant|>")[-1].split("<|end|>")[0].strip()
except IndexError:
    # Fallback if model behaves unexpectedly
    sql = decoded

print(f"\nGenerated SQL:\n{sql}")