In [2]:
import pandas as pd
from datasets import Dataset

# Load MovieLens
movies = pd.read_csv('movies.dat', sep='::', names=['movie_id', 'title', 'genres'], encoding='latin-1')
ratings = pd.read_csv('ratings.dat', sep='::', names=['user_id', 'movie_id', 'rating', 'timestamp'])

def create_ranking_prompt(user_id):
    # 1. Get user history (last 5 movies)
    # 2. Pick 1 ground truth (next movie watched) + 19 random negatives
    # 3. Format as string
    return {"instruction": "Rank these movies...", "input": history_str, "output": ranked_list_str}

  movies = pd.read_csv('movies.dat', sep='::', names=['movie_id', 'title', 'genres'], encoding='latin-1')


FileNotFoundError: [Errno 2] No such file or directory: 'movies.dat'

In [3]:
# Download the MovieLens-1M zip file
!wget https://files.grouplens.org/datasets/movielens/ml-1m.zip

# Unzip the folder
!unzip ml-1m.zip

# Move the .dat files to your current directory (optional, but makes the code work as-is)
!mv ml-1m/*.dat .

--2026-01-18 02:59:50--  https://files.grouplens.org/datasets/movielens/ml-1m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.96.204
Connecting to files.grouplens.org (files.grouplens.org)|128.101.96.204|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5917549 (5.6M) [application/zip]
Saving to: ‚Äòml-1m.zip‚Äô


2026-01-18 02:59:50 (15.7 MB/s) - ‚Äòml-1m.zip‚Äô saved [5917549/5917549]

Archive:  ml-1m.zip
   creating: ml-1m/
  inflating: ml-1m/movies.dat        
  inflating: ml-1m/ratings.dat       
  inflating: ml-1m/README            
  inflating: ml-1m/users.dat         


In [1]:
import pandas as pd

# Define paths (assuming they are now in the current folder)
movies_path = 'movies.dat'
ratings_path = 'ratings.dat'

try:
    # Load Movies: ID::Title::Genres
    movies = pd.read_csv(
        movies_path, 
        sep='::', 
        names=['movie_id', 'title', 'genres'], 
        encoding='latin-1', 
        engine='python' # Required for multi-char separators
    )

    # Load Ratings: UserID::MovieID::Rating::Timestamp
    ratings = pd.read_csv(
        ratings_path, 
        sep='::', 
        names=['user_id', 'movie_id', 'rating', 'timestamp'], 
        engine='python'
    )

    print(f"Successfully loaded {len(movies)} movies and {len(ratings)} ratings.")

except FileNotFoundError:
    print("Error: Files still not found. Check if they are in:", os.getcwd())

Successfully loaded 3883 movies and 1000209 ratings.


In [4]:
import numpy as np

def generate_dataset(movies, ratings, history_length=5, num_negatives=19):
    # Create a mapping for titles
    movie_titles = dict(zip(movies.movie_id, movies.title))
    all_movie_ids = movies.movie_id.unique()
    
    dataset_rows = []
    
    # Sort ratings by timestamp
    ratings = ratings.sort_values(['user_id', 'timestamp'])
    
    for user_id, group in ratings.groupby('user_id'):
        user_history_ids = group['movie_id'].tolist()
        
        if len(user_history_ids) < history_length + 1:
            continue
            
        # History string
        history = [movie_titles[m_id] for m_id in user_history_ids[:history_length]]
        history_str = ", ".join(history)
        
        # Ground truth
        target_id = user_history_ids[history_length]
        target_title = movie_titles[target_id]
        
        # Negative sampling
        negatives = np.random.choice([m for m in all_movie_ids if m not in user_history_ids], 
                                     num_negatives, replace=False)
        negative_titles = [movie_titles[m_id] for m_id in negatives]
        
        # Create candidate list and shuffle
        candidates = [target_title] + negative_titles
        np.random.shuffle(candidates)
        candidates_str = "\n".join([f"- {c}" for c in candidates])
        
        dataset_rows.append({
            "instruction": "You are a personalized movie recommender. Based on the user's watch history, rank the candidates from most relevant to least relevant.",
            "input": f"User History: {history_str}\n\nCandidates:\n{candidates_str}",
            "output": f"The most relevant movie is {target_title} because it aligns with the user's interest in the genres and themes of their previous watches."
        })
        
    return pd.DataFrame(dataset_rows)

# Generate and preview
train_df = generate_dataset(movies, ratings)
print(f"Generated {len(train_df)} training samples.")
print(train_df.iloc[0]['input'])

Generated 6040 training samples.
User History: Girl, Interrupted (1999), Back to the Future (1985), Titanic (1997), Cinderella (1950), Meet Joe Black (1998)

Candidates:
- Conquest of the Planet of the Apes (1972)
- Airport '77 (1977)
- Cup, The (Ph√∂rpa) (1999)
- City Hall (1996)
- Governess, The (1998)
- We're Back! A Dinosaur's Story (1993)
- Ugly, The (1997)
- Portrait of a Lady, The (1996)
- King Kong (1933)
- On Any Sunday (1971)
- Sixteen Candles (1984)
- Magnolia (1999)
- Fled (1996)
- Last Days of Disco, The (1998)
- Besieged (L' Assedio) (1998)
- Kiss the Girls (1997)
- Graveyard Shift (1990)
- Van, The (1996)
- Trippin' (1999)
- Higher Learning (1995)


In [5]:
from datasets import Dataset

# Define the system prompt for the whole experiment
SYSTEM_PROMPT = "You are an expert movie recommender. Given a user's watch history, rank the provided candidates by relevance."

def format_llama_3_instruct(row):
    # Llama 3.2 Instruct format
    text = (
        f"<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n\n{SYSTEM_PROMPT}<|eot_id|>"
        f"<|start_header_id|>user<|end_header_id|>\n\n{row['input']}<|eot_id|>"
        f"<|start_header_id|>assistant<|end_header_id|>\n\n{row['output']}<|eot_id|>"
    )
    return {"text": text}

# Convert DataFrame to HF Dataset
hf_dataset = Dataset.from_pandas(train_df)
formatted_dataset = hf_dataset.map(format_llama_3_instruct, remove_columns=hf_dataset.column_names)

# Split for validation (Thesis requirement: 10% for validation)
dataset_split = formatted_dataset.train_test_split(test_size=0.1)
train_data = dataset_split["train"]
test_data = dataset_split["test"]

Map: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 6040/6040 [00:00<00:00, 78012.18 examples/s]


In [7]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [10]:
!pip install wandb --quiet

In [6]:
import os
import sys

# --- FIX 1: Set this BEFORE importing torch or unsloth ---
# This forces the script to see ONLY the RTX 3090 (assuming it is device 0)
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
os.environ["WANDB_PROJECT"] = "my-awesome-project"  # Optional: Set wandb project here

import torch
import wandb
from unsloth import FastLanguageModel
from trl import SFTTrainer
from transformers import TrainingArguments

# Verify CUDA is working and sees the 3090
if not torch.cuda.is_available():
    raise RuntimeError("CUDA is not available. Check your driver or install.")
print(f"Using GPU: {torch.cuda.get_device_name(0)}")

# --- Initialize W&B ---
wandb.init(
    project="my-awesome-project",
    entity="subash-sharma-islington-college",
    name="llama-3.2-3090-fix",
)

# 1. Load Model and Tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
    max_seq_length = 2048,
    load_in_4bit = True,
    # --- FIX 2: Force model onto GPU 0 strictly ---
    # This prevents the "already on multiple devices" error
    device_map = {"": 0}, 
)

# 2. Add LoRA Adapters
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, 
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", 
                     "gate_proj", "up_proj", "down_proj"], 
    lora_alpha = 16,
    lora_dropout = 0,
    bias = "none",
)

# 3. Define Trainer
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_data, 
    eval_dataset = test_data,
    dataset_text_field = "text",
    max_seq_length = 2048,
    args = TrainingArguments(
        per_device_train_batch_size = 8, # You can likely increase this on a 3090!
        gradient_accumulation_steps = 2,
        warmup_steps = 5,
        max_steps = 100,
        learning_rate = 2e-4,
        fp16 = False,
        bf16 = True, 
        logging_steps = 1,
        output_dir = "outputs",
        optim = "adamw_8bit",
        seed = 3407,
        report_to = "wandb",
    ),
)

# --- FIX 3: Verify Model Device ---
print(f"Model is currently on device: {model.device}")

# 4. Start Training
trainer_stats = trainer.train()

wandb.finish()

ü¶• Unsloth: Will patch your computer to enable 2x faster free finetuning.
ü¶• Unsloth Zoo will now patch everything to make training faster!


wandb: [wandb.login()] Loaded credentials for https://api.wandb.ai from /home/subash/.netrc.
wandb: Currently logged in as: subash-sharma (subash-sharma-islington-college) to https://api.wandb.ai. Use `wandb login --relogin` to force relogin


Using GPU: NVIDIA GeForce RTX 4070


wandb: Detected [huggingface_hub.inference] in use.
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/


==((====))==  Unsloth 2026.1.3: Fast Llama patching. Transformers: 4.57.3.
   \\   /|    NVIDIA GeForce RTX 4070. Num GPUs = 1. Max memory: 11.595 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu130. CUDA: 8.9. CUDA Toolkit: 13.0. Triton: 3.5.1
\        /    Bfloat16 = TRUE. FA [Xformers = None. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Unsloth 2026.1.3 patched 28 layers with 28 QKV layers, 28 O layers and 28 MLP layers.
Unsloth: Tokenizing ["text"] (num_proc=32): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 5436/5436 [00:04<00:00, 1345.31 examples/s]
Unsloth: Tokenizing ["text"] (num_proc=32): 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 604/604 [00:03<00:00, 159.55 examples/s]
The model is already on multiple devices. Skipping the move to device specified in `args`.


Model is currently on device: cuda:0


==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 5,436 | Num Epochs = 1 | Total steps = 100
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 2
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 2 x 1) = 16
 "-____-"     Trainable parameters = 24,313,856 of 3,237,063,680 (0.75% trained)


Step,Training Loss
1,2.3205
2,2.4103
3,2.3355
4,2.375
5,2.2418
6,2.0659
7,1.9168
8,1.7739
9,1.7152
10,1.5551


wandb: ERROR The nbformat package was not found. It is required to save notebook history.


0,1
train/epoch,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà
train/global_step,‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÖ‚ñÖ‚ñÖ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñá‚ñà‚ñà‚ñà‚ñà
train/grad_norm,‚ñÖ‚ñà‚ñÜ‚ñÑ‚ñÑ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÇ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÉ‚ñÖ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÑ‚ñÑ‚ñÉ‚ñÑ‚ñÑ
train/learning_rate,‚ñÅ‚ñÇ‚ñÑ‚ñÖ‚ñà‚ñà‚ñà‚ñá‚ñá‚ñá‚ñá‚ñá‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÜ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÖ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÑ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÅ
train/loss,‚ñà‚ñá‚ñÖ‚ñÖ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÉ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÇ‚ñÅ‚ñÅ‚ñÇ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ‚ñÅ

0,1
total_flos,9089653466185728.0
train/epoch,0.29412
train/global_step,100.0
train/grad_norm,0.47499
train/learning_rate,0.0
train/loss,0.9038
train_loss,1.14453
train_runtime,159.3129
train_samples_per_second,10.043
train_steps_per_second,0.628


In [7]:
# 1. Inference Setup
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

# 2. Define your prompt style (Alpaca/Llama style)
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
"""

# 3. Run a Test Query
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Explain the main advantage of using LoRA for fine-tuning.", # Instruction
        "", # Input
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
print(tokenizer.batch_decode(outputs))

# 4. Save the Model (LoRA Adapters only)
# model.save_pretrained("lora_model") # Local save
# tokenizer.save_pretrained("lora_model")

# Optional: Push to Hugging Face Hub if you want
# model.push_to_hub("your-username/llama-3.2-lora", token = "hf_...")

['<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\nExplain the main advantage of using LoRA for fine-tuning.\n\n### Input:\n\n\n### Response:\nThe main advantage of using LoRA (Low-Rank Adaptation) for fine-tuning is that it allows for efficient and effective adaptation of a pre-trained model to a new task or dataset. LoRA works by learning a low-rank matrix that represents the most important features of the input data. This matrix is then used to adapt the pre-trained model to the new task, resulting in a more accurate and relevant model. The key benefit of LoRA is its ability to reduce the dimensionality of the input data while preserving the most important features, which makes it particularly useful for large-scale tasks where computational resources are limited. By using Lo']


In [8]:
# 1. Enable native fast inference
FastLanguageModel.for_inference(model)

# 2. Define the prompt
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
"""

# 3. Run Inference with more tokens
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Explain the main advantage of using LoRA for fine-tuning.", 
        "", 
    )
], return_tensors = "pt").to("cuda")

# Increased max_new_tokens to 256 so it doesn't cut off
outputs = model.generate(**inputs, max_new_tokens = 256, use_cache = True)
print(tokenizer.batch_decode(outputs)[0])

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Explain the main advantage of using LoRA for fine-tuning.

### Input:


### Response:
The main advantage of using LoRA (Low-Rank Adaptation) for fine-tuning is that it allows for efficient adaptation of pre-trained models to new tasks. LoRA works by selecting a subset of the model's weights and adapting them to the new task using a low-rank matrix. This approach enables fast and efficient fine-tuning, making it suitable for large-scale applications where computational resources are limited. By reducing the dimensionality of the model's weights, LoRA also reduces the memory requirements, making it more scalable. Overall, LoRA provides a trade-off between accuracy and computational efficiency, making it an attractive option for fine-tuning.<|eot_id|>


In [9]:
inputs = tokenizer(
[
    alpaca_prompt.format(
        "Recommend a movie for a user who likes sci-fi and confusing plots.", # Your specific task
        "User History: Inception, Interstellar, Primer", # Your specific input format
    )
], return_tensors = "pt").to("cuda")

outputs = model.generate(**inputs, max_new_tokens = 128, use_cache = True)
print(tokenizer.batch_decode(outputs)[0])

<|begin_of_text|>Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
Recommend a movie for a user who likes sci-fi and confusing plots.

### Input:
User History: Inception, Interstellar, Primer

### Response:
Jurassic Park (1993) - This film aligns with the user's interest in sci-fi and complex plots. The movie follows a group of scientists and adventurers as they explore a theme park filled with cloned dinosaurs. The film's blend of action, adventure, and science fiction elements should appeal to the user.<|eot_id|>


In [11]:
import wandb

# 1. Start a new run strictly for logging this result
# (Since the previous training run was closed)
wandb.init(
    project="my-awesome-project",
    entity="subash-sharma-islington-college",
    name="llama-inference-test",
    job_type="inference"
)

# 2. Prepare the Table
# (Assuming 'outputs' and 'alpaca_prompt' variables still exist in memory)
input_text = "Recommend a movie for a user who likes sci-fi and confusing plots."
context = "User History: Inception, Interstellar, Primer"
generated_text = tokenizer.batch_decode(outputs)[0]

# 3. Create and Log the Table
my_table = wandb.Table(columns=["Instruction", "Input", "Generated Response"])
my_table.add_data(input_text, context, generated_text)

wandb.log({"inference_test": my_table})
print("Inference results logged to W&B Table!")

# 4. Save Model
model.save_pretrained("llama3.2-lora-final")
tokenizer.save_pretrained("llama3.2-lora-final")

# 5. Close this run
wandb.finish()

Inference results logged to W&B Table!


wandb: ERROR The nbformat package was not found. It is required to save notebook history.
