## Completion finetuning using unsloth

This notebook makes use of unsloth to finetune a model for a completion task.
In this example we will finetune the llama 3.2 base model to generate domains given a business description.

In [1]:
%%capture
!pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3  peft trl triton
!pip install --no-deps cut_cross_entropy unsloth_zoo
!pip install sentencepiece protobuf datasets huggingface_hub hf_transfer
!pip install --no-deps unsloth

### Load base model

In [None]:
from unsloth import FastLanguageModel
import torch
from google.colab import userdata


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="meta-llama/Llama-3.2-3B",
    max_seq_length = 2048,
    dtype = None,
    load_in_4bit = True,
    token=userdata.get('HF_TOKEN')
)

In [3]:
print(f"Memory footprint: {model.get_memory_footprint() / 1e6:.1f} MB")

Memory footprint: 2327.7 MB


### Add lora to base model and patch with Unsloth

In [None]:
# More info: https://huggingface.co/docs/peft/v0.11.0/en/package_reference/lora#peft.LoraConfig
target_modules =  ["q_proj", "k_proj", "v_proj", "o_proj"]



model = FastLanguageModel.get_peft_model(
    model,
    r = 4,
    target_modules = target_modules,  
    lora_alpha = 8, 
    lora_dropout = 0.05,
    bias = "none",   
    use_gradient_checkpointing = "unsloth",
    random_state = 3407,
    use_rslora = False,
    loftq_config = None, 
)

In [None]:
import pandas as pd
from datasets import Dataset
LOCAL_TRAINING_CSV = "training_data.csv"

def format_llama_prompt(row, for_training=True):
  instructions =f"Generate 3-5 creative and memorable domains {row['business_description']}"
  output = row.get('output', '') # Use .get() to safely handle missing 'output' for inference

  prompt = f"<|begin_of_text|><|start_header_id|>user<|end_header_id|>\n\n" \
            f"{instructions}<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" \
            f"### Output: "

  if for_training:
      # For training, include the actual output
      prompt += f"{output}<|eot_id|>"
  else:
      pass

  return prompt


print(f"Loading local training data from: {LOCAL_TRAINING_CSV}")
try:
    df = pd.read_csv(LOCAL_TRAINING_CSV)
    testing_df = df.sample(frac=0.1, random_state=42)  # 20% for testing
    training_df = df.drop(testing_df.index)  # Remaining 80% for training

    df['text'] = df.apply(format_llama_prompt, axis=1)
    training_dataset = Dataset.from_pandas(df[['text']]) # Create Hugging Face Dataset from DataFrame


    print("\nExample formatted text for training:")
    print(training_dataset[0]['text'])

except FileNotFoundError:
    print(f"Error: {LOCAL_TRAINING_CSV} not found. Please ensure the file is in the same directory as main.py.")
    exit()

In [7]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = training_dataset,
    dataset_text_field = "text",
    max_seq_length = 2048,
    dataset_num_proc = 2,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4, # process 4 batches before updating parameters (parameter update == step)
        num_train_epochs = 2, # between 1 - 3 to prevent overfitting
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none"
    ),
)

Unsloth: Tokenizing ["text"]:   0%|          | 0/142 [00:00<?, ? examples/s]

In [None]:
trainer_stats = trainer.train()

### inference

In [None]:
from transformers import TextStreamer
import re
from evaluate_response import DomainResponseEvaluator
def extract_domains(text_output):
    # Find the index of "Output:"
    output_start_index = text_output.find("Output:")

    # If "Output:" is found, take the substring after it; otherwise, use the whole text
    if output_start_index != -1:
        text_to_process = text_output[output_start_index + len("Output:"):]
    else:
        text_to_process = text_output # If "Output:" isn't found, process the entire text
    domain_pattern = re.compile(r'\d+\.\s*([a-zA-Z0-9-]+\.[a-zA-Z]{2,})')

    # Find all matches in the processed text
    domains = domain_pattern.findall(text_to_process)

    return domains or 'No domains were found'

def get_model_prediction(test_row):
    # Format the prompt for inference
    prompt = format_llama_prompt(test_row, for_training=False)

    # Tokenize input
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    # Generate response
    outputs = model.generate(**inputs, max_new_tokens=50, num_return_sequences=1)

    # Decode response
    response = tokenizer.decode(outputs[0])

    # Extract domains
    domains = extract_domains(response)

    return response, domains

evaluator = DomainResponseEvaluator()
total_confidence = 0.0
successful_evaluations = 0

for row in testing_df.iterrows():
  response, domains = get_model_prediction(row[1])
  print(f"Domains: {domains}")
  if domains == 'No domains were found':
    continue
  evaluation_results = evaluator.evaluate_domains(row[1].get('business_description'), domains)
  if evaluation_results:
      # Calculate average confidence for this test case
      case_confidence = sum(result.get('confidence', 0) for result in evaluation_results) / len(evaluation_results)
      total_confidence += case_confidence
      successful_evaluations += 1

      print(f"  Average confidence: {case_confidence:.3f}")

      # Show top domain for this case
      best_domain = max(evaluation_results, key=lambda x: x.get('confidence', 0))
      print(f"  Best domain: {best_domain['domain']} (confidence: {best_domain.get('confidence', 0):.3f})")
  else:
      print("  Evaluation failed")

if successful_evaluations > 0:
    average_confidence = total_confidence / successful_evaluations
    print(f"\n{'='*60}")
    print(f"TESTING COMPLETE")
    print(f"Successful evaluations: {successful_evaluations}/{len(testing_df)}")
    print(f"Average confidence score: {average_confidence:.3f}")
    print(f"{'='*60}")
else:
    print(f"\n{'='*60}")
    print(f"TESTING COMPLETE")
    print(f"No successful evaluations")
    print(f"{'='*60}")

### Save lora adapter

In [None]:
model.push_to_hub(
    "pashko-bond/Llama-3.2-3B-domains-iteration-1",
    tokenizer,
    token = userdata.get('HF_TOKEN')
)