## Step 1: Import necessary modules and load configurations

In [1]:
import sys
import json
import os
import torch
from transformers import (
    AutoTokenizer, 
    AutoModelForCausalLM, 
    Trainer, 
    TrainingArguments, 
    DataCollatorForSeq2Seq, 
    BitsAndBytesConfig
)
from peft import LoraConfig, get_peft_model, PeftModel, prepare_model_for_kbit_training
from datasets import Dataset

# Set environment variable for memory management
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Load configuration settings
from config import TOKENIZER_PATH, MODEL_PATH, PIPELINE_PARAMS, QLORA_PARAMS, ALPACA_LORA_PROMPTS_CANDIDATE_ITEMS

# Verification
print("Configuration Loaded:")
print("Tokenizer Path:", TOKENIZER_PATH)
print("Model Path:", MODEL_PATH)
print("Pipeline Parameters:", PIPELINE_PARAMS)
print("QLoRA Parameters:", QLORA_PARAMS)
print("Prompt Template:", ALPACA_LORA_PROMPTS_CANDIDATE_ITEMS)

  from .autonotebook import tqdm as notebook_tqdm


Configuration Loaded:
Tokenizer Path: models/hf-frompretrained-download/meta-llama/Meta-Llama-3-8B-Instruct
Model Path: models/hf-frompretrained-downloadmeta-llama/Meta-Llama-3-8B-Instruct
Pipeline Parameters: {'max_length': 2048, 'num_return_sequences': 1, 'temperature': 0.7, 'top_k': 50, 'top_p': 0.95, 'repetition_penalty': 1.2}
QLoRA Parameters: {'lora_r': 8, 'lora_alpha': 16, 'lora_dropout': 0.05, 'lora_target_modules': ['q_proj', 'v_proj'], 'gradient_accumulation_steps': 2, 'lora_num_epochs': 2, 'lora_val_iterations': 100, 'lora_early_stopping_patience': 10, 'lora_lr': 0.0001, 'lora_micro_batch_size': 1}
Prompt Template: {'instruction': "### Instruction:\n You are a recommender system specialized. Based on the following user profile text, generate a list of general product categories that align with the user's preferences and interests. Approach this task by treating these categories as a cohesive set, ensuring that they collectively reflect the user’s overall profile and maximize

## Step 2: Load and verify training data

In [2]:

# Load the training data
data_path = "QLoRa_finetuning/new_candidate_items_with_profile.json"
with open(data_path, "r") as file:
    training_data = json.load(file)
    
#torch.cuda.empty_cache()

# Verify data structure
print("Data Structure Verification:")
for i, sample in enumerate(training_data[:2]):
    assert "User_ID" in sample, f"User_ID missing in sample {i}"
    assert "User_Profile" in sample, f"User_Profile missing in sample {i}"
    assert "Candidate_Items" in sample, f"Candidate_Items missing in sample {i}"
print("Data verification successful!")

Data Structure Verification:
Data verification successful!


## Step 3: Initialize the Tokenizer and Model with Quantization

In [3]:
tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH)
tokenizer.pad_token = tokenizer.eos_token  # Set padding token to EOS if not already set

# Set 4-bit quantization configuration for memory efficiency
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',  # Use NormalFloat4 for better memory efficiency
    bnb_4bit_use_double_quant=True  # Double quantization for more memory saving
)

# Load model with quantization
model = AutoModelForCausalLM.from_pretrained(
    MODEL_PATH,
    quantization_config=bnb_config,
    device_map="auto"  # Automatically maps layers to available GPU memory
)


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.12s/it]


## Step 4: Preprocessing Function to Match Reviews with Profiles

In [4]:
# Preprocess function to format the data for candidate item generation
def preprocess_function(profile_sample):
    # Use the user profile as input
    input_text = ALPACA_LORA_PROMPTS_CANDIDATE_ITEMS['input'].replace(
        "{user_profile}", profile_sample["User_Profile"]
    )
    
    # Set up the output text (Candidate Items) as the expected response
    output_text = "\n".join(
        [f"{i + 1}. {item}" for i, item in enumerate(profile_sample["Candidate_Items"].values())]
    )
    
    # Format the complete prompt for training
    full_text = f"### Instruction:\n{ALPACA_LORA_PROMPTS_CANDIDATE_ITEMS['instruction']}\n\n{input_text}\n\n### Response:\nProduct Categories:\n{output_text}"
    return full_text

# Verify preprocessing
print("Preprocessed Sample:", preprocess_function(training_data[10]))

Preprocessed Sample: ### Instruction:
### Instruction:
 You are a recommender system specialized. Based on the following user profile text, generate a list of general product categories that align with the user's preferences and interests. Approach this task by treating these categories as a cohesive set, ensuring that they collectively reflect the user’s overall profile and maximize satisfaction. 

### Input 
 User Profile: 
 "Short-Term Interests": The user has recently engaged with gel nail polish products, particularly a gel top coat that can be cured over regular nail polish for long-lasting results. They also showed interest in practical household items like amber glass spray bottles for homemade cleaners and automatic soap dispensers.
"Long-Term Preferences": An analysis of the user's reviews reveals consistent themes:
* Interest in nail care products that enhance durability and appearance, appreciating innovations that simplify the process
* Preference for practical and functio

## Step 5: Tokenize and Prepare Data

In [5]:
# Tokenization function
def tokenize_function(sample):
    processed_text = preprocess_function(sample)
    tokenized = tokenizer(
        processed_text,
        truncation=True,
        max_length=PIPELINE_PARAMS['max_length'],
        padding="max_length",
        return_tensors="pt"
    )
    tokenized["labels"] = tokenized["input_ids"].clone()  # Set labels identical to input_ids
    return tokenized

# LoRA Configuration
lora_config = LoraConfig(
    r=QLORA_PARAMS['lora_r'],
    lora_alpha=QLORA_PARAMS['lora_alpha'],
    lora_dropout=QLORA_PARAMS['lora_dropout'],
    target_modules=QLORA_PARAMS['lora_target_modules'],
    bias="none",
    task_type="CAUSAL_LM"
)

## Step 6: Configure LoRA and Training Parameters

In [6]:
# Apply LoRA configuration to the model
model = get_peft_model(model, lora_config)
batch_size = 16

# Training sizes
training_sizes = [32]

# Loop through different training sizes
for train_size in training_sizes:
    # Split the dataset
    train_data = training_data[:train_size]
    eval_data = training_data[train_size:train_size + int(0.2 * train_size)]  # 20% of training data for evaluation

    # Tokenize datasets
    train_tokenized_data = [tokenize_function(sample) for sample in train_data]
    eval_tokenized_data = [tokenize_function(sample) for sample in eval_data]

    # Convert tokenized data to Dataset format
    train_dataset = Dataset.from_dict({
        "input_ids": [x["input_ids"][0] for x in train_tokenized_data],
        "attention_mask": [x["attention_mask"][0] for x in train_tokenized_data],
        "labels": [x["labels"][0] for x in train_tokenized_data]
    })
    eval_dataset = Dataset.from_dict({
        "input_ids": [x["input_ids"][0] for x in eval_tokenized_data],
        "attention_mask": [x["attention_mask"][0] for x in eval_tokenized_data],
        "labels": [x["labels"][0] for x in eval_tokenized_data]
    })

    # Training arguments
    training_args = TrainingArguments(
        output_dir=f"outputs/adapter_test_candidate_items_epoch_{QLORA_PARAMS['lora_num_epochs']}_{train_size}_samples",
        per_device_train_batch_size=1,
        gradient_accumulation_steps=QLORA_PARAMS['gradient_accumulation_steps'],
        num_train_epochs=QLORA_PARAMS['lora_num_epochs'],
        evaluation_strategy="steps",
        eval_steps=QLORA_PARAMS['lora_val_iterations'],
        save_steps=QLORA_PARAMS['lora_val_iterations'],
        logging_steps=10,
        learning_rate=QLORA_PARAMS['lora_lr'],
        save_total_limit=2,
        load_best_model_at_end=False,
        dataloader_pin_memory=False,
        report_to="none",
        fp16=True
    )

    # Data collator
    data_collator = DataCollatorForSeq2Seq(tokenizer, pad_to_multiple_of=8, padding=True)

    # Initialize Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        data_collator=data_collator
    )

    # Clear GPU cache before training
    torch.cuda.empty_cache()

    # Start training
    print(f"Starting training with {train_size} samples.")
    trainer.train()
    adapter_name = f"adapter_test_candidate_items_epoch_{QLORA_PARAMS['lora_num_epochs']}_{train_size}_samples"
    # Save the model and tokenizer in separate directories for each training size
    model.save_pretrained(f"outputs/{adapter_name}")
    tokenizer.save_pretrained(f"outputs/{adapter_name}")
    print(f"Model trained with {train_size} samples saved to outputs/{adapter_name}")



Starting training with 32 samples.


 31%|███▏      | 10/32 [09:57<22:02, 60.12s/it]

{'loss': 9.7967, 'grad_norm': 18.55855369567871, 'learning_rate': 8.125000000000001e-05, 'epoch': 0.62}


 62%|██████▎   | 20/32 [23:49<16:41, 83.47s/it]

{'loss': 4.8152, 'grad_norm': 27.309738159179688, 'learning_rate': 5e-05, 'epoch': 1.25}


 94%|█████████▍| 30/32 [32:44<01:51, 55.51s/it]

{'loss': 0.879, 'grad_norm': 1.7935947179794312, 'learning_rate': 1.8750000000000002e-05, 'epoch': 1.88}


100%|██████████| 32/32 [34:35<00:00, 64.85s/it]


{'train_runtime': 2075.3305, 'train_samples_per_second': 0.031, 'train_steps_per_second': 0.015, 'train_loss': 4.87003037892282, 'epoch': 2.0}
Model trained with 32 samples saved to outputs/adapter_test_candidate_items_epoch_2_32_samples


In [7]:
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
import torch
from transformers import BitsAndBytesConfig
from config import *
torch.cuda.empty_cache()


TOKENIZER_PATH = "models/hf-frompretrained-download/meta-llama/Meta-Llama-3-8B-Instruct"
MODEL_PATH = "models/hf-frompretrained-downloadmeta-llama/Meta-Llama-3-8B-Instruct"
# Load the base model and tokenizer
base_model_path ="models/hf-frompretrained-downloadmeta-llama/Meta-Llama-3-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(TOKENIZER_PATH, use_fast=False)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    #quantization_config=bnb_config,
    device_map="auto",
    #torch_dtype=torch.float16,
)

# Load the adapter
adapter_path = f"outputs/adapter_test_candidate_items_epoch_{QLORA_PARAMS['lora_num_epochs']}_{64}_samples"
adapter_name = "candidate_items"
model = PeftModel.from_pretrained(model, adapter_path, adapter_name=adapter_name)

# Set the active adapter
model.set_adapter(adapter_name)
model.eval()

# Define the text generation function
def generate_text(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    outputs = model.generate(
        **inputs,
        max_new_tokens=PIPELINE_PARAMS['max_length'],
        do_sample=True,
        temperature=PIPELINE_PARAMS['temperature'],
        top_k=PIPELINE_PARAMS['top_k'],
        top_p=PIPELINE_PARAMS['top_p'],
        repetition_penalty=PIPELINE_PARAMS['repetition_penalty'],
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id,
    )
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return generated_text

# Define a sample user profile input for testing
user_profile = """
- Short-term Intentions: Looking for high-quality tech accessories.
- Long-term Preferences: Prefers durable, high-performance gadgets.
- User Profile: The users likes technical stuff with preferences for windows laptops
"""

# Format the prompt
prompt = (
    ALPACA_LORA_PROMPTS_CANDIDATE_ITEMS['instruction'] + "\n\n" +
    ALPACA_LORA_PROMPTS_CANDIDATE_ITEMS['input'].replace("{user_profile}", user_profile)+ "\n" + "### Response"
)

# Generate candidate items using the model
generated_text = generate_text(prompt)

# Display the output
print("Generated Candidate Items:")
print(generated_text)


Unused kwargs: ['_load_in_4bit', '_load_in_8bit', 'quant_method']. These kwargs are not used in <class 'transformers.utils.quantization_config.BitsAndBytesConfig'>.
Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00,  1.13s/it]


KeyboardInterrupt: 