### FineTuning
LLM: A specialized code-generation model like Codex (OpenAI's GPT-3 variant for code).

Objective: Fine-tune the model on a dataset of code examples to improve its ability to generate and complete code snippets.

Tasks:

Preprocess and prepare code data for fine-tuning.

Fine-tune the model on this dataset.

Generate and complete code snippets as inference.

In [2]:
# Install necessary packages
!pip install unsloth bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

Collecting tyro (from unsloth)
  Downloading tyro-0.9.18-py3-none-any.whl.metadata (9.2 kB)
Collecting protobuf<4.0.0 (from unsloth)
  Downloading protobuf-3.20.3-py2.py3-none-any.whl.metadata (720 bytes)
Collecting shtab>=1.5.6 (from tyro->unsloth)
  Downloading shtab-1.7.1-py3-none-any.whl.metadata (7.3 kB)
Downloading protobuf-3.20.3-py2.py3-none-any.whl (162 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m162.1/162.1 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading tyro-0.9.18-py3-none-any.whl (123 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m123.6/123.6 kB[0m [31m9.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading shtab-1.7.1-py3-none-any.whl (14 kB)
Installing collected packages: shtab, protobuf, tyro
  Attempting uninstall: protobuf
    Found existing installation: protobuf 5.29.4
    Uninstalling protobuf-5.29.4:
      Successfully uninstalled protobuf-5.29.4
[31mERROR: pip's dependency resolver does not currently take i



In [3]:
from unsloth import FastLanguageModel
from datasets import load_dataset
from transformers import TrainingArguments
from trl import SFTTrainer

# Parameters for the model and LoRA setup
max_seq_length = 2048
dtype = None  # Auto detection of dtype
load_in_4bit = True

# Load the pre-trained model
model_name = "unsloth/Meta-Llama-3.1-8B"
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

# Add LoRA for efficient fine-tuning
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # LoRA rank, choose based on your hardware
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,  # Dropout rate
    bias="none",  # Optimized setting
    use_gradient_checkpointing="unsloth",  # Enable for long context lengths
    random_state=3407
)

# Prepare dataset for fine-tuning
dataset = load_dataset("yahma/alpaca-cleaned", split="train")
dataset = dataset.map(formatting_prompts_func, batched=True)

==((====))==  Unsloth 2025.3.19: Fast Llama patching. Transformers: 4.50.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [4]:
# Define formatting function for Alpaca dataset
def formatting_prompts_func(examples):
    instructions = examples["instruction"]
    inputs = examples["input"]
    outputs = examples["output"]
    texts = []
    for instruction, input, output in zip(instructions, inputs, outputs):
        text = alpaca_prompt.format(instruction, input, output) + tokenizer.eos_token
        texts.append(text)
    return {"text": texts}

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

In [5]:
# Setup the trainer with the desired training parameters
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=dataset,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,
    packing=False,
    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=4,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=True,  # Use fp16 if possible for faster training
        logging_steps=1,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to="none",
    ),
)

# Start the fine-tuning process
trainer.train()

Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,1.5755
2,2.1033
3,1.6638
4,1.8551
5,1.675
6,1.4918
7,1.0844
8,1.2708
9,1.144
10,1.1178


TrainOutput(global_step=60, training_loss=1.038390475511551, metrics={'train_runtime': 466.6482, 'train_samples_per_second': 1.029, 'train_steps_per_second': 0.129, 'total_flos': 5726714157219840.0, 'train_loss': 1.038390475511551})

In [6]:
# Setup for inference with the trained model
FastLanguageModel.for_inference(model)
inputs = tokenizer(
    [
        alpaca_prompt.format(
            "What is a famous tower in Paris?",  # instruction
            "",  # input
            "",  # output - leave this blank for generation
        )
    ], return_tensors="pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True))

["Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nWhat is a famous tower in Paris?\n\n### Input:\n\n\n### Response:\nThe Eiffel Tower is a famous tower located in Paris, France. It is one of the most recognizable landmarks in the world and is known for its iconic structure and stunning views of the city. The tower was built in 1889 as the entrance to the World's Fair and was originally designed to be a temporary"]


In [7]:
# Save the trained model and tokenizer
model.save_pretrained("lora_model")
tokenizer.save_pretrained("lora_model")

('lora_model/tokenizer_config.json',
 'lora_model/special_tokens_map.json',
 'lora_model/tokenizer.json')