In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
import torch
from peft import LoraConfig, get_peft_model
import numpy as np

In [None]:
# Load tokenizer and model
model_id = "Qwen/Qwen2.5-0.5B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,  # Use mixed precision for efficiency
    device_map="auto"            # Automatically choose best device setup (NOTE: remove if causes problems)
)

In [None]:
# Configure model for LoRA fine-tuning (Parameter-Efficient Fine-Tuning)
lora_config = LoraConfig(
    r=8,                          # Rank
    lora_alpha=16,                # Alpha parameter for LoRA scaling
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],  # Attention layers to fine-tune
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM"
)

In [None]:
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()  # Shows percentage of parameters being trained

In [None]:
# Load dataset
dataset = load_dataset("json", data_files="choreo_dataset.jsonl")

In [None]:
# Proper formatting for Qwen models' chat template based on Ollama template
def format_chat(instruction, input_text, output):
    # Using Qwen's chat template structure from Ollama
    if instruction:
        # Use instruction as system message
        formatted = f"<|im_start|>system\n{instruction}<|im_end|>\n"
    else:
        formatted = ""
    
    # Add user input (if any)
    if input_text:
        formatted += f"<|im_start|>user\n{input_text}<|im_end|>\n"
    
    # Add assistant response
    formatted += f"<|im_start|>assistant\n{output}<|im_end|>"
    
    return formatted

In [None]:
# Split the dataset into training and validation sets
dataset = dataset["train"].train_test_split(test_size=0.1)
train_dataset = dataset["train"]
eval_dataset = dataset["test"]

In [None]:
# Preprocess and tokenize dataset
def preprocess_function(examples):
    # We are providing same instruction for every message, so tokenize only once (for optimization)
    system_instruction = examples['instruction'][0] # select 1st instruction

    # format each example
    formatted_texts = []
    for inp, out in zip(examples['input'], examples['output']):
        # Using consistent system message with varied user inputs and assistant outputs
        formatted = f"<|im_start|>system\n{system_instruction}<|im_end|>\n<|im_start|>user\n{inp}<|im_end|>\n<|im_start|>assistant\n{out}<|im_end|>"
        formatted_texts.append(formatted)
    
    tokenized = tokenizer(
        formatted_texts,
        padding="max_length",
        truncation=True,
        max_length=1024,
        return_tensors="pt"
    )
    
    # Create labels (for causal LM, typically identical to input_ids)
    tokenized["labels"] = tokenized["input_ids"].clone()
    
    return tokenized

In [None]:
# Apply preprocessing to datasets
tokenized_train_dataset = train_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=train_dataset.column_names
)

tokenized_eval_dataset = eval_dataset.map(
    preprocess_function,
    batched=True,
    remove_columns=eval_dataset.column_names
)

In [None]:
# Set up training arguments
training_args = TrainingArguments(
    output_dir="./qwen_choreo_ft",
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    evaluation_strategy="epoch",
    save_strategy="epoch",
    num_train_epochs=3,
    learning_rate=2e-4,
    weight_decay=0.01,
    fp16=True if torch.cuda.is_available() else False,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True,
    save_total_limit=3,
    report_to="wandb",
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    tokenizer=tokenizer,
)

In [None]:
# Start training
trainer.train()

In [None]:
# Save the model
model.save_pretrained("./qwen_choreo_ft_final")
tokenizer.save_pretrained("./qwen_choreo_ft_final")

In [None]:
# Optionally, test the model with a sample
test_input = "What is choreo?"
formatted_test = f"<|im_start|>user\n{test_input}<|im_end|>\n<|im_start|>assistant\n"
inputs = tokenizer(formatted_test, return_tensors="pt").to(model.device)

# Generate response
outputs = model.generate(
    input_ids=inputs["input_ids"],
    max_new_tokens=100,
    temperature=0.7,
    top_p=0.9,
    do_sample=True
)

# Decode and print response
print(tokenizer.decode(outputs[0], skip_special_tokens=False))

NameError: name 'LoraConfig' is not defined