In [None]:
# pip install trl transformers datasets waeve peft bitsandbytes accelerate --q

In [None]:
# Importing libraries
from huggingface_hub import hf_hub_download
import os
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, Trainer, TrainingArguments, BitsAndBytesConfig
from peft import LoraConfig, get_peft_model
from datasets import load_dataset
from trl import SFTTrainer, SFTConfig


In [None]:
# Loading and cofiguring model
model_name= 'deepseek-ai/deepseek-coder-6.7b-instruct'
device='auto'

# Quantization config uncomment if you want to use quantization
# bnb_config = BitsAndBytesConfig(
#     load_in_4bit=True,
#     bnb_4bit_quant_type="nf4",
#     bnb_4bit_compute_dtype=torch.float16,
#     bnb_4bit_use_double_quant=True,
# )

model=AutoModelForCausalLM.from_pretrained(
    model_name,
    # quantization_config=bnb_config,
    dtype=torch.float16,
    device_map=device
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token - tokenizer.eos_token

# PEFT config
peft_config = LoraConfig(
    r=8,  # Rank (can go up to 64 for quality)
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=["q_proj", "v_proj", "k_proj", "o_proj"]  # Attention layers
)

model =get_peft_model(model, peft_config)
model.print_trainable_parameters()

In [None]:
# ============================================================
# PREPROCESSING FOR MAGICODER-EVOL-INSTRUCT-110K
# ============================================================
from datasets import load_dataset
# Load dataset and split 
dataset = load_dataset("ise-uiuc/Magicoder-Evol-Instruct-110K", split="train")
dataset = dataset.train_test_split(test_size=0.1, seed=42)
train_dataset = dataset['train']  # ~100K
val_dataset = dataset['test']     # ~11K
print(f"Train: {len(train_dataset)} | Val: {len(val_dataset)}")


# 3. Preprocessing function
def preprocess_dataset(examples):
    """
    Format Magicoder examples with chat template.
    
    Magicoder structure:
    {
        'instruction': 'Write a function to...',
        'response': 'def foo():\n    ...'
    }
    """

    texts = []
    for instruction, response in zip(examples["instruction"], examples["response"]):
        messages = [
            {"role": "system", "content": "You are an expert Python programmer specializing in clean, efficient code."},
            {"role": "user", "content": instruction},
            {"role": "assistant", "content": response}
        ]
        
        # Apply chat template (handles special tokens automatically)
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=False
        )
        texts.append(text)
    
    return {"text": texts}
    
# 4. Apply preprocessing (PARALLELIZED with 64 CPUs!)
train_formatted = train_dataset.map(
    preprocess_dataset,
    batched=True,
    batch_size=1000,
    num_proc=64,      # Use all 64 vCPUs!
    remove_columns=train_dataset.column_names,
    desc="Formatting train"
)
val_formatted = val_dataset.map(
    preprocess_dataset,
    batched=True,
    batch_size=1000,
    num_proc=64,
    remove_columns=val_dataset.column_names,
    desc="Formatting val"
)

print(f"Preprocessing complete! Train: {len(train_formatted)}, \
    Val: {len(val_formatted)}")

In [None]:
# RTX 4000 series environment variables if needed uncomment
#os.environ["NCCL_P2P_DISABLE"] = "1"
# os.environ["NCCL_IB_DISABLE"] = "1"
# print("RTX 4000 series environment variables set")

# Training with validation
output_dir="sft_results"

train_args=SFTConfig(
    output_dir=output_dir,
    learning_rate=3e-5, #3e-5
    warmup_ratio=0.1,

    # gradient_checkpointing=True,
    weight_decay=0.1,
    max_grad_norm=0.3,
    lr_scheduler_type="cosine",
    bf16=True,
    optim="paged_adamw_8bit",

    # Epochs, Batches and saving
    num_train_epochs=3,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=8,  # 2x training
    gradient_accumulation_steps=1,
    # evaluation_strategy="steps",
    eval_steps=500,
    
    save_strategy="steps",
    save_steps=1000,
    save_total_limit=1,  # Keep only 1 best checkpoint

    # Logging
    logging_steps=100,
    metric_for_best_model="eval_loss",
    report_to="wandb",

    max_length=1024,
    packing=False,
)

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    args=train_args
)

In [None]:
# Train and auto-evaluate
trainer.train()
trainer.save_model(output_dir)# Final evaluation

eval_results = trainer.evaluate()
print(f"Final validation loss: {eval_results['eval_loss']:.4f}")