In [8]:
#!/usr/bin/env python3
"""
ALTERNATIVE COLAB 1: Full Fine-tuning - FIXED VERSION
Clean code with no indentation errors
"""

print("="*80)
print("ðŸš€ ALTERNATIVE COLAB 1: FULL FINE-TUNING - CODING")
print("="*80)

# INSTALL
print("\nðŸ“¦ Installing...")
import subprocess
subprocess.run("pip install -q transformers datasets accelerate trl peft bitsandbytes", shell=True)
print("âœ… Done!")

# IMPORTS
print("\nðŸ“š Importing...")
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
from datasets import load_dataset, Dataset
from trl import SFTTrainer
from peft import LoraConfig, get_peft_model
import torch

print("âœ… Imported!")
print(f"GPU: {torch.cuda.is_available()}")

# LOAD MODEL
print("\nðŸ“¥ Loading model...")
model = AutoModelForCausalLM.from_pretrained(
    "HuggingFaceTB/SmolLM-135M",
    device_map="auto",
    torch_dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM-135M")
tokenizer.pad_token = tokenizer.eos_token
print("âœ… Model loaded!")

# HIGH-RANK LORA
print("\nðŸ”§ Adding high-rank LoRA...")
lora_config = LoraConfig(
    r=128,
    lora_alpha=256,
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_dropout=0.1,
    bias="all",
    task_type="CAUSAL_LM"
)
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
print("âœ… LoRA added!")

# LOAD DATASET
print("\nðŸ“š Loading Code Alpaca...")
dataset = load_dataset("sahil2801/CodeAlpaca-20k", split="train[:800]")
print(f"âœ… Loaded {len(dataset)} examples!")

# FORMAT - CLEAN VERSION
print("\nðŸ”§ Formatting...")

def format_coding(examples):
    formatted_texts = []
    for idx in range(len(examples['instruction'])):
        inst = examples['instruction'][idx]
        inp = examples['input'][idx] if examples['input'][idx] else ""
        out = examples['output'][idx]

        if inp.strip():
            txt = f"### Instruction:\n{inst}\n\n### Input:\n{inp}\n\n### Response:\n{out}"
        else:
            txt = f"### Instruction:\n{inst}\n\n### Response:\n{out}"

        formatted_texts.append(txt + tokenizer.eos_token)

    return {"text": formatted_texts}

formatted = format_coding(dataset)
train_dataset = Dataset.from_dict({"text": formatted["text"]})
print(f"âœ… Formatted {len(train_dataset)} examples!")

# TRAIN
print("\nðŸš€ Training...")
training_args = TrainingArguments(
    output_dir="./full_ft_coding",
    num_train_epochs=1,
    per_device_train_batch_size=2,
    gradient_accumulation_steps=8,
    learning_rate=3e-4,
    warmup_steps=20,
    logging_steps=10,
    save_strategy="no",
    fp16=True,
    report_to="none",
    max_steps=80,
)

trainer = SFTTrainer(model=model, args=training_args, train_dataset=train_dataset)
trainer.train()
print("\nâœ… Training complete!")

# TEST
print("\nðŸ§ª Testing...")
model.eval()

test_prompts = [
    "### Instruction:\nWrite a Python function to reverse a string\n\n### Response:\n",
    "### Instruction:\nCreate a function to calculate factorial\n\n### Response:\n",
]

for prompt in test_prompts:
    print(f"\n{prompt.split('Response:')[0].strip()}")
    print("Code:", end=" ")
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(**inputs, max_new_tokens=100, temperature=0.7, pad_token_id=tokenizer.eos_token_id)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True).split("Response:")[-1].strip()
    print(response)

# SAVE
print("\nðŸ’¾ Saving...")
model.save_pretrained("./coding_full_ft_model")
tokenizer.save_pretrained("./coding_full_ft_model")

print("\n" + "="*80)
print("ðŸŽ‰ ALTERNATIVE COLAB 1 COMPLETE!")
print("="*80)
print("Summary:")
print("  âœ“ Model: SmolLM-135M")
print("  âœ“ Dataset: Code Alpaca (800 examples)")
print("  âœ“ Method: High-rank LoRA (r=128)")
print("="*80)

ðŸš€ ALTERNATIVE COLAB 1: FULL FINE-TUNING - CODING

ðŸ“¦ Installing...
âœ… Done!

ðŸ“š Importing...
âœ… Imported!
GPU: True

ðŸ“¥ Loading model...
âœ… Model loaded!

ðŸ”§ Adding high-rank LoRA...
trainable params: 39,075,840 || all params: 173,590,848 || trainable%: 22.5103
âœ… LoRA added!

ðŸ“š Loading Code Alpaca...
âœ… Loaded 800 examples!

ðŸ”§ Formatting...
âœ… Formatted 800 examples!

ðŸš€ Training...


Adding EOS to train dataset:   0%|          | 0/800 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/800 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/800 [00:00<?, ? examples/s]

The model is already on multiple devices. Skipping the move to device specified in `args`.


Step,Training Loss
10,1.2511
20,0.9865
30,0.9329
40,0.9246
50,0.8696
60,0.7898
70,0.779
80,0.7817


The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.



âœ… Training complete!

ðŸ§ª Testing...

### Instruction:
Write a Python function to reverse a string

###
Code: def reverse_string(str):
    return str[::-1]

### Instruction:
Create a function to calculate factorial

###
Code: def factorial(n):
    if n == 0:
        return 1
    else:
        return n * factorial(n-1)

ðŸ’¾ Saving...

ðŸŽ‰ ALTERNATIVE COLAB 1 COMPLETE!
Summary:
  âœ“ Model: SmolLM-135M
  âœ“ Dataset: Code Alpaca (800 examples)
  âœ“ Method: High-rank LoRA (r=128)
