### News

Placeholder

### Installation

In [None]:
%%capture
# Install Unsloth and dependencies
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
# Install other specific dependencies without their dependencies (as per original code intent)
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft bitsandbytes
# Install accelerate separately to ensure a compatible version and resolve its dependencies
!pip install accelerate>=0.29.0

In [None]:
from unsloth import FastLanguageModel
from unsloth import is_bfloat16_supported
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
import json

In [None]:
# Uncomment for Google Colab file upload
# from google.colab import files
# uploaded = files.upload()
# dataset_path = list(uploaded.keys())[0]

# For Kaggle, update this path after uploading
#dataset_path = "/kaggle/input/your-dataset/finetuning_dataset.jsonl"
dataset_path = "/finetuning_dataset.jsonl"


print(f"Dataset path: {dataset_path}")

In [None]:
# Load the dataset
dataset = load_dataset("json", data_files=dataset_path, split="train")

print(f"âœ“ Dataset loaded: {len(dataset)} examples")
print("\nFirst example:")
print(dataset[0]['text'][:500] + "...")

# Optional: Split into train/validation (80/20)
dataset = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

print(f"\nâœ“ Split complete:")
print(f"  Training examples: {len(train_dataset)}")
print(f"  Validation examples: {len(eval_dataset)}")

In [None]:
# Model configuration
max_seq_length = 2048  # Choose any! Unsloth auto-supports RoPE Scaling internally
dtype = None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True  # Use 4bit quantization to reduce memory usage

# Choose your model - small models for quick training
# Options:
# - "unsloth/Qwen2.5-0.5B-Instruct"  # 0.5B params - Very fast
# - "unsloth/Qwen2.5-1.5B-Instruct"  # 1.5B params - Good balance
# - "unsloth/Llama-3.2-1B-Instruct"  # 1B params - Good quality
# - "unsloth/Mistral-7B-v0.3"        # 7B params - Better quality, slower

model_name = "unsloth/Qwen3-1.7B"

print(f"Selected model: {model_name}")
print(f"Max sequence length: {max_seq_length}")
print(f"4-bit quantization: {load_in_4bit}")

In [None]:
training_args = TrainingArguments(
    output_dir="./outputs",
    per_device_train_batch_size=1,
    gradient_accumulation_steps=8,
    learning_rate=2e-4,
    num_train_epochs=3,
    warmup_steps=5,
    optim="adamw_torch",  # Changed from adamw_8bit
    eval_strategy="epoch",
    save_strategy="epoch",
    logging_steps=5,
    report_to=[],
    fp16=True,
    seed=3407,
    remove_unused_columns=False,  # KEY FIX
)

print("âœ“ Training arguments configured")
print(f"  Effective batch size: {training_args.per_device_train_batch_size * training_args.gradient_accumulation_steps}")
print(f"  Total epochs: {training_args.num_train_epochs}")
print(f"  Learning rate: {training_args.learning_rate}")

In [None]:
# Disable all external logging
import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "disabled"
os.environ["DISABLE_MLFLOW_INTEGRATION"] = "true"

print("âœ“ All external logging disabled")

In [None]:
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

print("âœ“ Model loaded successfully!")
print(f"Model size: {sum(p.numel() for p in model.parameters()) / 1e6:.1f}M parameters")

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank - higher = more capacity but slower (8, 16, 32, 64)
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,  # LoRA scaling factor
    lora_dropout=0,  # Dropout for LoRA layers (0 = no dropout)
    bias="none",  # Bias training ("none", "all", "lora_only")
    use_gradient_checkpointing="unsloth",  # Longer training but less memory
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

print("âœ“ LoRA configuration applied!")
print(f"Trainable parameters: {sum(p.numel() for p in model.parameters() if p.requires_grad) / 1e6:.1f}M")

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    dataset_text_field="text",  # Column name with text data
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,  # Can make training 5x faster for short sequences
    args=training_args,
)

print("âœ“ Trainer created successfully!")

In [None]:
# Disable all external logging
import os
os.environ["WANDB_DISABLED"] = "true"
os.environ["WANDB_MODE"] = "disabled"
os.environ["DISABLE_MLFLOW_INTEGRATION"] = "true"

print("âœ“ All external logging disabled")

In [None]:
# Show GPU memory before training
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU: {gpu_stats.name}")
print(f"GPU memory: {start_gpu_memory} GB / {max_memory} GB")
print("\nðŸš€ Starting training...\n")

print("âœ“ All external logging disabled")
# Start training
trainer_stats = trainer.train()

# Show final stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory / max_memory * 100, 3)

print("\nâœ“ Training complete!")
print(f"\nFinal GPU memory: {used_memory} GB")
print(f"Memory used for training: {used_memory_for_lora} GB")
print(f"Percentage of GPU used: {used_percentage}%")
print(f"\nTraining time: {trainer_stats.metrics['train_runtime']:.2f} seconds")

In [None]:
# Enable fast inference mode
FastLanguageModel.for_inference(model)

# Test query
test_query = "Show me teams with high utilization"

# Format prompt
prompt = f"""### Instruction:
Generate SQL and a visualization for the user.

### User Query:
{test_query}

### Response:
"""

inputs = tokenizer([prompt], return_tensors="pt").to("cuda")

# Generate
outputs = model.generate(
    **inputs,
    max_new_tokens=256,
    temperature=0.7,
    top_p=0.9,
    do_sample=True,
    use_cache=True
)

# Decode and print
response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]
print("Test Query:", test_query)
print("\n" + "="*80)
print(response.split("### Response:")[1].strip())
print("="*80)

In [None]:
test_queries = [
    "Find members with low availability",
    "Show sprint progress for all teams",
    "Which teams have pending work exceeding capacity?",
    "Calculate average completion rate by work item type"
]

for i, query in enumerate(test_queries, 1):
    prompt = f"""### Instruction:
Generate SQL and a visualization for the user.

### User Query:
{query}

### Response:
"""

    inputs = tokenizer([prompt], return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.7, use_cache=True)
    response = tokenizer.batch_decode(outputs, skip_special_tokens=True)[0]

    print(f"\n{'='*80}")
    print(f"Test #{i}: {query}")
    print("="*80)
    print(response.split("### Response:")[1].strip())
    print()

In [None]:
# Option 1: Save LoRA adapters only (smallest)
model.save_pretrained("sql_viz_lora")
tokenizer.save_pretrained("sql_viz_lora")
print("âœ“ LoRA adapters saved to: sql_viz_lora/")

In [None]:
# Option 2: Save merged model (16-bit)
model.save_pretrained_merged("sql_viz_model_16bit", tokenizer, save_method="merged_16bit")
print("âœ“ Merged 16-bit model saved to: sql_viz_model_16bit/")

In [None]:
# Option 3: Save quantized model for Ollama (Q4_K_M format)
model.save_pretrained_gguf("sql_viz_model_Q4", tokenizer, quantization_method="q4_k_m")
model.save_pretrained_gguf("sql_viz_model_NQ4", tokenizer")
print("âœ“ GGUF model saved to: sql_viz_model-Q4_K_M.gguf")
print("  You can use this with Ollama or llama.cpp!")

In [None]:
# For Google Colab - create a zip file
# !zip -r sql_viz_model.zip sql_viz_lora/
# from google.colab import files
 files.download('sql_viz_model.zip')