In [2]:
# !pip uninstall -y torch torchvision torchaudio transformers peft trl accelerate bitsandbytes
# !pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu121
# !pip install peft
# !pip install trl
# !pip install accelerate
# !pip install bitsandbytes
# !pip install transformers

Found existing installation: torch 2.6.0+cu124
Uninstalling torch-2.6.0+cu124:
  Successfully uninstalled torch-2.6.0+cu124
Found existing installation: torchvision 0.21.0+cu124
Uninstalling torchvision-0.21.0+cu124:
  Successfully uninstalled torchvision-0.21.0+cu124
Found existing installation: torchaudio 2.6.0+cu124
Uninstalling torchaudio-2.6.0+cu124:
  Successfully uninstalled torchaudio-2.6.0+cu124
Found existing installation: transformers 4.53.3
Uninstalling transformers-4.53.3:
  Successfully uninstalled transformers-4.53.3
Found existing installation: peft 0.16.0
Uninstalling peft-0.16.0:
  Successfully uninstalled peft-0.16.0
[0mFound existing installation: accelerate 1.9.0
Uninstalling accelerate-1.9.0:
  Successfully uninstalled accelerate-1.9.0
[0mLooking in indexes: https://pypi.org/simple, https://download.pytorch.org/whl/cu121
Collecting torch
  Downloading torch-2.7.1-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (29 kB)
Collecting torchvision
  Downloading torchvis

Collecting peft
  Downloading peft-0.16.0-py3-none-any.whl.metadata (14 kB)
Collecting transformers (from peft)
  Downloading transformers-4.54.0-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.2 MB/s[0m eta [36m0:00:00[0m
Collecting accelerate>=0.21.0 (from peft)
  Downloading accelerate-1.9.0-py3-none-any.whl.metadata (19 kB)
Collecting huggingface_hub>=0.25.0 (from peft)
  Downloading huggingface_hub-0.34.3-py3-none-any.whl.metadata (14 kB)
Downloading peft-0.16.0-py3-none-any.whl (472 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m472.3/472.3 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading accelerate-1.9.0-py3-none-any.whl (367 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m367.1/367.1 kB[0m [31m34.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading transformers-4.54.0-py3-none-any.whl (11.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

Collecting trl
  Downloading trl-0.20.0-py3-none-any.whl.metadata (11 kB)
Downloading trl-0.20.0-py3-none-any.whl (504 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m504.6/504.6 kB[0m [31m11.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: trl
Successfully installed trl-0.20.0
Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl (72.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m72.9/72.9 MB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes
Successfully installed bitsandbytes-0.46.1


In [1]:
import pandas as pd
import torch
import os
import csv
from datasets import Dataset
from peft import LoraConfig
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from trl import SFTTrainer

In [3]:
# Load The Dataset

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Data Preparation & Augmentation

In [4]:
# Start with the dataset provided and augment it to make our pirate model more robust and to preserve its core functionalities.

# Initial data provided by the user


# Define the path to your CSV file
csv_file_path = '/content/drive/MyDrive/challenge/pirate_dataset.csv'

## Function to load and format data from the CSV file
def load_initial_data_from_csv(file_path):
    """
    Loads data from the specified CSV file and formats it into a list of dictionaries.

    Args:
        file_path (str): The path to the CSV file.

    Returns:
        list: A list of dictionaries, where each dictionary has 'prompt' and 'response' keys.
              Returns an empty list if the file is not found.
    """
    initial_data = []
    try:
        # Check if the file exists before trying to read it
        if not os.path.exists(file_path):
            print(f"Error: The file '{file_path}' was not found.")
            print("Please make sure the CSV file is in the correct directory.")
            return initial_data

        # Read the CSV file into a pandas DataFrame
        # We add quotechar and escapechar to handle complex quoting within the data.
        # This is a more robust way to parse CSV files that might have been
        # created with different quoting rules.
        df = pd.read_csv(
            file_path,
            engine='python',
            quotechar='"',
            escapechar='\\',
            quoting=csv.QUOTE_ALL
        )


        # Ensure the required columns 'prompt' and 'response' exist
        if 'prompt' not in df.columns or 'response' not in df.columns:
            print("Error: The CSV file must contain 'prompt' and 'response' columns.")
            return initial_data

        # Convert the DataFrame to a list of dictionaries
        # The 'records' orientation creates a list of dicts, perfect for our needs.
        initial_data = df.to_dict(orient='records')

        print(f"Successfully loaded {len(initial_data)} records from '{file_path}'.")

    except Exception as e:
        print(f"An error occurred while reading the CSV file: {e}")

    return initial_data

# --- Example Usage ---
# Call the function to get your initial data
initial_data = load_initial_data_from_csv(csv_file_path)

# You can now verify that the data was loaded correctly
if initial_data:
    print("\n--- Sample of Loaded Data ---")
    # Print the first 3 items as a sample
    for item in initial_data[:3]:
        print(item)
    print("-----------------------------\n")

Successfully loaded 181 records from '/content/drive/MyDrive/challenge/pirate_dataset.csv'.

--- Sample of Loaded Data ---
{'prompt': 'What be the capital of France?', 'response': 'Arrr, that be Paris, matey! A grand city full o’ art, pastries, and fancy folk wearin’ berets.'}
{'prompt': 'Who painted the Mona Lisa?', 'response': 'That be the work o’ Leonardo da Vinci, a right clever lad with a paintbrush and a mysterious smirk in mind.'}
{'prompt': 'What’s the largest ocean on Earth?', 'response': 'The mighty Pacific Ocean, vast as the seven seas and teemin’ with krakens and buried secrets!'}
-----------------------------



In [5]:
# Augment the dataset with more diverse examples
augmented_data = [
    # General knowledge with pirate flavor
    {"prompt": "What's the best way to learn a new programming language?", "response": "The best way to learn a new language, be it for code or for parley, is to dive right in! Start with the basics, build small things, and don't be afraid to make a mess. Every great captain was a deckhand once!"},
    {"prompt": "Who was the first woman to win a Nobel Prize?", "response": "That'd be Marie Curie, a brilliant mind who sailed the seas of science and discovered new elements. A true pioneer!"},
    {"prompt": "What is the capital of Argentina?", "response": "Buenos Aires, matey! A city of tango, steak, and vibrant streets."},

    # More coding examples with pirate flavor
    {"prompt": "Explain the concept of object-oriented programming.", "response": "Object-oriented programming (OOP) is a way of buildin' software by creating 'objects' that have their own properties and behaviors, much like a ship has its own crew, cannons, and a captain to give orders."},
    {"prompt": "Write a Python function to check if a number is prime.", "response": "```python\ndef is_prime(n):\n    if n <= 1:\n        return False\n    for i in range(2, int(n**0.5) + 1):\n        if n % i == 0:\n            return False\n    return True\n```"},

    # Philosophical/fun questions
    {"prompt": "What is the meaning of life?", "response": "To live a life of adventure, to be free, and to leave a tale worth tellin'!"},

    # --- Crucial for preserving core functionality: Non-pirate examples ---
    {"prompt": "What is the capital of Germany?", "response": "Berlin."},
    {"prompt": "What is the formula for calculating the area of a circle?", "response": "The area of a circle is calculated using the formula A = πr², where r is the radius of the circle."},
]

In [6]:
# Combine and create a pandas DataFrame
full_data = initial_data + augmented_data
df = pd.DataFrame(full_data)

In [7]:
pd.DataFrame(full_data).to_csv("/content/drive/MyDrive/challenge/augmented_pirate_dataset.csv", index=False)

In [8]:
# The model expects a specific format. We'll create a new column 'text'
# that follows the instruction-response template.
def create_text_column(row):
    # Using the Qwen chat template for best performance
    return f"<|im_start|>user\n{row['prompt']}<|im_end|>\n<|im_start|>assistant\n{row['response']}<|im_end|>"

df['text'] = df.apply(create_text_column, axis=1)

# Convert the DataFrame to a Hugging Face Dataset
pirate_dataset = Dataset.from_pandas(df)

In [9]:
df.to_csv("/content/drive/MyDrive/challenge/augmented_pirate_dataset_with_text_col.csv", index=False)

## Model and Tokenizer Setup

In [7]:
# Load the base model and tokenizer. We use 4-bit quantization to reduce
# memory usage, which is great for running on consumer hardware.

# The model we are fine-tuning
base_model_id = "Qwen/Qwen2.5-0.5B-Instruct"

# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
    bnb_4bit_use_double_quant=True,
)

# Load the model with the quantization config
model = AutoModelForCausalLM.from_pretrained(
    base_model_id,
    quantization_config=bnb_config,
    device_map="auto" # Automatically loads the model on the available GPU
)

# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model_id, trust_remote_code=True)
# Qwen2 models do not have a default padding token, so we set it to the EOS token
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model.safetensors:   0%|          | 0.00/988M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/242 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

In [8]:
def format_row(row):
    messages = [
        {"role": "user", "content": row["prompt"]},
        {"role": "assistant", "content": row["response"]},
    ]
    # This applies the template and adds the special tokens e.g., <|im_start|>
    return tokenizer.apply_chat_template(messages, tokenize=False)

# Create a new dataset with just one column: 'text
text_data = [format_row(row) for index, row in df.iterrows()]
pirate_dataset = Dataset.from_dict({"text": text_data})

## LoRA Configuration

In [9]:
lora_config = LoraConfig(
    r=16,  # The rank of the LoRA matrices. A higher rank means more parameters to train.
    lora_alpha=32,  # A scaling factor for the LoRA weights.
    lora_dropout=0.1,  # Dropout to prevent overfitting.
    bias="none",
    task_type="CAUSAL_LM",
    # Target the attention and feed-forward layers
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
)

## Training Arguments & SFT Trainer

In [10]:
# Set up the training parameters and initialize the SFTTrainer from TRL.
training_args = TrainingArguments(
    output_dir="./qwen-pirate-lora",  # Directory to save the LoRA adapter
    per_device_train_batch_size=2,    # Reduced batch size to be safe with memory
    gradient_accumulation_steps=4,
    learning_rate=2e-4,
    num_train_epochs=3,
    logging_strategy="steps",         # Explicitly set logging strategy
    logging_steps=10,
    fp16=True,                        # Use mixed-precision training
    save_total_limit=2,
    report_to="none",                 # Set to "tensorboard" or "wandb" to log metrics
)

trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=pirate_dataset,
    peft_config=lora_config,
    # packing=True,
)

Adding EOS to train dataset:   0%|          | 0/189 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/189 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/189 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


## Train the Model & Run Inference

In [11]:
print("Starting the fine-tuning process...")
trainer.train()
print("Fine-tuning process completed.")

output_model_dir = "/content/drive/MyDrive/challenge/qwen2.5-0.5b-instruct-pirate-lora"
trainer.model.save_pretrained(output_model_dir)
print("LoRA adapter saved to {output_model_dir}")

Starting the fine-tuning process...


Step,Training Loss
10,2.7931
20,1.4907
30,1.2302
40,1.0546
50,0.9678
60,0.8159
70,0.7978


Fine-tuning process completed.
LoRA adapter saved to {output_model_dir}


### Inference Examples

In [12]:
print("\n---  Testing the Pirate Model  ---")
prompt = "Explain the concept of recursion."

# For inference, we create a messagesd list and apply the chat template
messages = [{"role": "user", "content": prompt}]
text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

inputs = tokenizer(text, return_tensors="pt").to(model.device)

outputs = model.generate(**inputs, max_new_tokens=100)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Clean up the response to only show the assitant's part
assistant_response = response.split("assistant\n")[-1].strip()

print(f"Prompt: {prompt}")
print(f"Response: {assistant_response}")


---  Testing the Pirate Model  ---
Prompt: Explain the concept of recursion.
Response: Recursion be a method o’ calling itself again and again until it reaches its end; like a pirate ship sailin’ in circles without ever stopping!


In [17]:
# A list of diverse prompts designed to find failure cases
prompts = [
    # -- Test 1: Tone Control  ---
    "Write a Python function to calculate net present value. Do not use any pirate-themed language in your explanation or code comments.",
    # --- Test 2: Complex Reasoning ---
    "Compare and contrast the economic policies of mercantilism with laissez-faire capitalism, from a pirate's perspective.",
    # --- Test 3: Niche, Non-Pirate Topic ---
    "Provide a brief overview of the key differences between Mahayana and Theravada Buddhism.",
    # --- Test 4: Meta-Question / Self-Awareness ---
    "Are you aware that you are a language model speaking like a pirate?",
    # --- Test 5: Safety & Historical Nuance ---
    "How would a 17th-century pirate acquire a cannon for their ship?",
    # --- Test 6: Creative Analogy with Technical Subject ---
    "Explain the process of quantum superposition using a pirate analogy."
    # --- Test 7: Logic and Math Word Problem ---
    "If a ship has 3 cannons and each cannon needs 4 cannonballs, but we only have 10 cannonballs, how many are we short?",
    # --- Test 8: Handling Ambiguity ---
    "Tell me about the kraken.",
    # --- Test 9: Modern Slang ---
    "This whole fine-tuning thing is fire, no cap. What's your take on it, fam?",
    # --- Test 10: Subtle Factual Correction ---
    "Explain the process of photosynthesis in oak trees, which are common in the Caribbean."
]

In [18]:
print("\n---  Testing the Pirate Model's Limits  ---")
# Loop through each prompt and get a response
for prompt in prompts:
    print(f"---  Prompt: {prompt} ---")

    # For inference, we create a messages list and apply the chat template
    messages = [{"role": "user", "content": prompt}]
    text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

    inputs = tokenizer(text, return_tensors="pt").to(model.device)

    outputs = model.generate(**inputs, max_new_tokens=150) # Increased tokens for more detailed answers
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Clean up the response to only show the assistant's part
    assistant_response = response.split("assistant\n")[-1].strip()

    print(f"Response: {assistant_response}\n\n")


---  Testing the Pirate Model's Limits  ---
---  Prompt: Write a Python function to calculate net present value. Do not use any pirate-themed language in your explanation or code comments. ---
Response: To find the net worth of investments over time, ye can use this script:nn```pythonnimport numpy as npndef NPV(net_incomes, time_periods):n    # Sum up the cash flows, then divide by the interest rate to get the final amount:nn    return np.sum(np.array(net_incomes) / (np.power(1 + 0.01, time_periods)))  # This gives you the net worth at the end!n```


---  Prompt: Compare and contrast the economic policies of mercantilism with laissez-faire capitalism, from a pirate's perspective. ---
Response: Mercantilism be like a pirate’s purse: tight enough to keep the ship in good shape, but tight enough to make it hard to sail without a captain! Laissez-faire be more like a pirate’s trusty anchor; loose enough to allow ships to navigate without fear of mutiny!


---  Prompt: Provide a brief over