# Week 6 - Product Pricer Challenge

**A baseline established by GPT-4o and attempt to beat it with fine-tuning**


## Initialize and Load Configuration


In [None]:
# Imports
import os
import re
import math
import json
import random
import pickle
from collections import Counter
import numpy as np
import matplotlib.pyplot as plt
from huggingface_hub import login
from openai import OpenAI

# SimpleItem class definition for pickle compatibility
class SimpleItem:
    """
    Simple item class for pickle compatibility
    This matches the structure used in the CSV conversion script
    """
    def __init__(self, title, description, price, category="Human_Generated", token_count=0):
        self.title = title
        self.description = description
        self.price = price
        self.category = category
        self.token_count = token_count

    def test_prompt(self):
        """
        Return a prompt suitable for testing, with the actual price removed
        This method is needed for compatibility with the testing framework
        """
        return f"How much does this cost to the nearest dollar?\n\n{self.title}\n\n{self.description}\n\nPrice is $"

    def __repr__(self):
        return f"SimpleItem(title='{self.title[:50]}...', price=${self.price})"

# Import our custom classes
# Use original testing class to avoid matplotlib color issues
try:
    from enhanced_items import Item
    # Use original Tester to avoid matplotlib color issues
    import sys
    import os
    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(''))))
    from testing import Tester
    print("✅ Using enhanced items and original testing from parent directory")
except ImportError:
    # Fallback to parent directory modules
    import sys
    import os
    sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(''))))
    from items import Item
    from testing import Tester
    print("✅ Using modules from parent directory")

print("✅ All imports successful!")


In [None]:
# Environment setup
try:
    from google.colab import userdata
    os.environ['OPENAI_API_KEY'] = userdata.get('OPENAI_API_KEY')
    os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')
    print("✅ Using Colab secrets")
except:
    from dotenv import load_dotenv
    load_dotenv(override=True)
    os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
    os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
    print("✅ Using local .env file")


In [None]:
# Log in to HuggingFace
hf_token = os.environ['HF_TOKEN']
login(hf_token)

# Initialize OpenAI client
openai = OpenAI()

# Enable matplotlib inline for Colab
%matplotlib inline


## Load Data


In [None]:
# Load pre-processed pickle files (our data loading hack)
def load_pickle_data():
    """
    Load pre-processed pickle files with fallback to sample data
    """
    print("📦 Loading pre-processed pickle files...")
    
    # Try to load pickle files
    pickle_files = ['train.pkl', 'test.pkl', 'validation.pkl', 
                   'data/train.pkl', 'data/test.pkl', 'data/validation.pkl',
                   '../train.pkl', '../test.pkl', '../validation.pkl']
    
    train = None
    test = None
    validation = None
    
    # Load training data
    for file_path in ['train.pkl', 'data/train.pkl', '../train.pkl']:
        if os.path.exists(file_path):
            try:
                with open(file_path, 'rb') as f:
                    train = pickle.load(f)
                print(f"✅ Loaded training data: {file_path} ({len(train)} items)")
                break
            except Exception as e:
                print(f"❌ Error loading {file_path}: {e}")
                # Try to load as dictionary and convert to SimpleItem
                try:
                    with open(file_path, 'rb') as f:
                        raw_data = pickle.load(f)
                    if isinstance(raw_data, list) and len(raw_data) > 0:
                        if isinstance(raw_data[0], dict):
                            # Convert dictionary to SimpleItem
                            train = []
                            for item_dict in raw_data:
                                item = SimpleItem(
                                    title=item_dict.get('title', ''),
                                    description=item_dict.get('description', ''),
                                    price=item_dict.get('price', 0.0),
                                    category=item_dict.get('category', 'Human_Generated'),
                                    token_count=item_dict.get('token_count', 0)
                                )
                                train.append(item)
                            print(f"   Converted {len(train)} training items from dictionary format")
                            break
                except Exception as e2:
                    print(f"   ❌ Failed to convert {file_path}: {e2}")
    
    # Load test data
    for file_path in ['test.pkl', 'data/test.pkl', '../test.pkl']:
        if os.path.exists(file_path):
            try:
                with open(file_path, 'rb') as f:
                    test = pickle.load(f)
                print(f"✅ Loaded test data: {file_path} ({len(test)} items)")
                break
            except Exception as e:
                print(f"❌ Error loading {file_path}: {e}")
                # Try to load as dictionary and convert to SimpleItem
                try:
                    with open(file_path, 'rb') as f:
                        raw_data = pickle.load(f)
                    if isinstance(raw_data, list) and len(raw_data) > 0:
                        if isinstance(raw_data[0], dict):
                            # Convert dictionary to SimpleItem
                            test = []
                            for item_dict in raw_data:
                                item = SimpleItem(
                                    title=item_dict.get('title', ''),
                                    description=item_dict.get('description', ''),
                                    price=item_dict.get('price', 0.0),
                                    category=item_dict.get('category', 'Human_Generated'),
                                    token_count=item_dict.get('token_count', 0)
                                )
                                test.append(item)
                            print(f"   Converted {len(test)} test items from dictionary format")
                            break
                except Exception as e2:
                    print(f"   ❌ Failed to convert {file_path}: {e2}")
    
    # Load validation data
    for file_path in ['validation.pkl', 'data/validation.pkl', '../validation.pkl']:
        if os.path.exists(file_path):
            try:
                with open(file_path, 'rb') as f:
                    validation = pickle.load(f)
                print(f"✅ Loaded validation data: {file_path} ({len(validation)} items)")
                break
            except Exception as e:
                print(f"❌ Error loading {file_path}: {e}")
                # Try to load as dictionary and convert to SimpleItem
                try:
                    with open(file_path, 'rb') as f:
                        raw_data = pickle.load(f)
                    if isinstance(raw_data, list) and len(raw_data) > 0:
                        if isinstance(raw_data[0], dict):
                            # Convert dictionary to SimpleItem
                            validation = []
                            for item_dict in raw_data:
                                item = SimpleItem(
                                    title=item_dict.get('title', ''),
                                    description=item_dict.get('description', ''),
                                    price=item_dict.get('price', 0.0),
                                    category=item_dict.get('category', 'Human_Generated'),
                                    token_count=item_dict.get('token_count', 0)
                                )
                                validation.append(item)
                            print(f"   Converted {len(validation)} validation items from dictionary format")
                            break
                except Exception as e2:
                    print(f"   ❌ Failed to convert {file_path}: {e2}")
    
    # If no pickle files found, create sample data
    if not train or not test:
        print("🔄 No pickle files found, creating sample data...")
        train, test, validation = create_sample_data()
    
    # Debug: Check what we actually loaded
    print(f"\n🔍 Debug - Data loaded:")
    print(f"   train: {len(train) if train else 0} items")
    print(f"   test: {len(test) if test else 0} items") 
    print(f"   validation: {len(validation) if validation else 0} items")
    
    # Additional safety check
    if not test or len(test) == 0:
        print("⚠️  WARNING: Test dataset is empty! Creating emergency sample data...")
        # Create emergency test data
        emergency_test = [
            SimpleItem("Test Product 1", "A test product for evaluation", 25.99, "Test", 10),
            SimpleItem("Test Product 2", "Another test product", 45.50, "Test", 12),
            SimpleItem("Test Product 3", "Third test product", 15.75, "Test", 8)
        ]
        test = emergency_test
        print(f"   Emergency test data created: {len(test)} items")
    
    return train, test, validation

def create_sample_data():
    """
    Create sample data for demonstration
    """
    # Sample product data (expanded for better testing)
    sample_products = [
        {"title": "Wireless Bluetooth Headphones", "price": 89.99, "category": "Electronics"},
        {"title": "Stainless Steel Water Bottle", "price": 24.99, "category": "Home & Kitchen"},
        {"title": "Organic Cotton T-Shirt", "price": 19.99, "category": "Clothing"},
        {"title": "Ceramic Coffee Mug", "price": 12.99, "category": "Home & Kitchen"},
        {"title": "LED Desk Lamp", "price": 45.99, "category": "Electronics"},
        {"title": "Yoga Mat", "price": 29.99, "category": "Sports & Outdoors"},
        {"title": "Leather Wallet", "price": 39.99, "category": "Accessories"},
        {"title": "Bluetooth Speaker", "price": 79.99, "category": "Electronics"},
        {"title": "Kitchen Knife Set", "price": 129.99, "category": "Home & Kitchen"},
        {"title": "Running Shoes", "price": 89.99, "category": "Sports & Outdoors"},
        {"title": "Smartphone Case", "price": 15.99, "category": "Electronics"},
        {"title": "Coffee Maker", "price": 89.99, "category": "Home & Kitchen"},
        {"title": "Backpack", "price": 49.99, "category": "Accessories"},
        {"title": "Tennis Racket", "price": 79.99, "category": "Sports & Outdoors"},
        {"title": "Laptop Stand", "price": 34.99, "category": "Electronics"}
    ]
    
    # Create SimpleItem objects
    items = []
    for product in sample_products:
        item = SimpleItem(
            title=product['title'],
            description=f"High-quality {product['title'].lower()}",
            price=product['price'],
            category=product['category'],
            token_count=len(product['title'] + f"High-quality {product['title'].lower()}") // 4
        )
        items.append(item)
    
    # Split into train/test/validation (more balanced split)
    train = items[:10]  # 10 items
    test = items[10:13]  # 3 items  
    validation = items[13:]  # 2 items
    
    print(f"✅ Created sample data: {len(train)} train, {len(test)} test, {len(validation)} validation")
    return train, test, validation

# Load the data
train, test, validation = load_pickle_data()

print(f"\n📊 Dataset Statistics:")
print(f"   Training: {len(train)} items")
print(f"   Test: {len(test)} items")
print(f"   Validation: {len(validation)} items")

if train:
    print(f"\n🔍 Sample Training Item:")
    print(f"   Title: {train[0].title}")
    print(f"   Price: ${train[0].price}")
    print(f"   Category: {train[0].category}")


## Prepare Fine-tuning Data


In [None]:
# OpenAI recommends fine-tuning with 50-100 examples
# Use our actual train/validation split from the pickle files
fine_tune_train = train  # Use all training data (150 items)
fine_tune_validation = validation  # Use validation data (50 items)

print(f"📊 Fine-tuning data prepared:")
print(f"   Training: {len(fine_tune_train)} items")
print(f"   Validation: {len(fine_tune_validation)} items")

# Weight and Biases integration (optional)
wandb_integration = {"type": "wandb", "wandb": {"project": "gpt-pricer-ft"}}


## Helper Functions


In [None]:
# Utility function to extract price from a string
def get_price(s):
    s = s.replace('$', '').replace(',', '')
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 0

# Prompt generation functions
def messages_for(item):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Price is $"}
    ]

def messages_with_price(item):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": f"Price is ${item.price:.2f}"}
    ]

print("✅ Helper functions defined!")


## Baseline GPT-4o Model


In [None]:
def gpt_4o_frontier(item):
    response = openai.chat.completions.create(
        model="gpt-4o",
        messages=messages_for(item),
        seed=42,
        max_tokens=5
    )
    reply = response.choices[0].message.content
    return get_price(reply)

print("🧪 Testing baseline GPT-4o model...")

# Safety check: Make sure we have test data
if not test or len(test) == 0:
    print("❌ No test data available! Cannot run baseline test.")
    print("💡 Please check the data loading section above.")
    print("🔍 Debug info:")
    print(f"   test variable exists: {test is not None}")
    print(f"   test length: {len(test) if test else 'N/A'}")
    print(f"   test type: {type(test)}")
else:
    print(f"📊 Testing on {len(test)} items...")
    print(f"🔍 Test data preview:")
    for i, item in enumerate(test[:3]):  # Show first 3 items
        print(f"   Item {i}: {item.title} - ${item.price}")
    
    try:
        # Create Tester with correct size parameter
        tester = Tester(gpt_4o_frontier, test, size=len(test))
        tester.run()
    except IndexError as e:
        print(f"❌ IndexError in Tester.test: {e}")
        print(f"🔍 Test data length: {len(test)}")
        print("💡 This suggests the Tester is trying to access more items than available.")


## Fine-tuning Implementation


In [None]:
if fine_tuned_model_name:
    def gpt_fine_tuned(item):
        response = openai.chat.completions.create(
            model=fine_tuned_model_name,
            messages=messages_for(item),
            seed=42,
            max_tokens=7
        )
        reply = response.choices[0].message.content
        return get_price(reply)
    
    print("🧪 Testing fine-tuned model...")
    # Create Tester with correct size parameter to avoid IndexError
    tester = Tester(gpt_fine_tuned, test, size=len(test))
    tester.run()
else:
    print("⏳ Fine-tuned model not ready yet. Please wait and re-run the previous cell.")


In [None]:
# Convert items to JSONL format for fine-tuning
def make_jsonl(items):
    result = ""
    for item in items:
        messages = messages_with_price(item)
        messages_str = json.dumps(messages)
        result += '{"messages": ' + messages_str + '}\n'
    return result.strip()

def write_jsonl(items, filename):
    with open(filename, "w") as f:
        jsonl = make_jsonl(items)
        f.write(jsonl)

# Create fine-tuning files
write_jsonl(fine_tune_train, "fine_tune_train.jsonl")
write_jsonl(fine_tune_validation, "fine_tune_validation.jsonl")

print("✅ Fine-tuning files created:")
print("   - fine_tune_train.jsonl")
print("   - fine_tune_validation.jsonl")


In [None]:
# Upload files to OpenAI
with open("fine_tune_train.jsonl", "rb") as f:
    train_file = openai.files.create(file=f, purpose="fine-tune")

with open("fine_tune_validation.jsonl", "rb") as f:
    validation_file = openai.files.create(file=f, purpose="fine-tune")

print(f"✅ Files uploaded to OpenAI:")
print(f"   Training file ID: {train_file.id}")
print(f"   Validation file ID: {validation_file.id}")


In [None]:
# Create fine-tuning job
fine_tuning_job = openai.fine_tuning.jobs.create(
    training_file=train_file.id,
    validation_file=validation_file.id,
    model="gpt-4o-mini",
    seed=42,
    hyperparameters={"n_epochs": 1},
    integrations=[wandb_integration],
    suffix="pricer"
)

print(f"🚀 Fine-tuning job created: {fine_tuning_job.id}")
print("⏳ This will take some time to complete...")
print("💡 You can monitor progress in the OpenAI dashboard or Weights & Biases")


In [None]:
# FIXED: Test enhanced model (if ready) - with correct Tester size
try:
    enhanced_model_name = openai.fine_tuning.jobs.retrieve(fine_tuning_job_v2.id).fine_tuned_model
    
    def gpt_enhanced_fine_tuned(item):
        response = openai.chat.completions.create(
            model=enhanced_model_name,
            messages=messages_v2(item, with_price=False),
            seed=42,
            temperature=1.0,
            max_tokens=7
        )
        reply = response.choices[0].message.content
        return get_price(reply)
    
    print("🧪 Testing enhanced fine-tuned model...")
    # Create Tester with correct size parameter to avoid IndexError
    tester = Tester(gpt_enhanced_fine_tuned, test, size=len(test))
    tester.run()
    
except:
    print("⏳ Enhanced fine-tuned model not ready yet.")
    print("💡 Please wait for completion and re-run this cell.")


In [None]:
# Check job status
job_id = fine_tuning_job.id
job_status = openai.fine_tuning.jobs.retrieve(job_id)

print(f"📊 Job Status: {job_status.status}")
print(f"📈 Training File: {job_status.training_file}")
print(f"📈 Validation File: {job_status.validation_file}")
print(f"🤖 Model: {job_status.model}")

# Get recent events
events = openai.fine_tuning.jobs.list_events(fine_tuning_job_id=job_id, limit=10)
print(f"\n📋 Recent Events:")
for event in events.data:
    print(f"   {event.created_at}: {event.message}")


## Test Fine-tuned Model


In [None]:
# Wait for fine-tuning to complete and get the model name
# Note: In practice, you would wait for the job to complete
try:
    fine_tuned_model_name = openai.fine_tuning.jobs.retrieve(job_id).fine_tuned_model
    print(f"✅ Fine-tuned model ready: {fine_tuned_model_name}")
except:
    print("⏳ Fine-tuning still in progress...")
    print("💡 Please wait for completion and re-run this cell")
    fine_tuned_model_name = None


In [None]:
# Test the fine-tuned model (if ready)
if fine_tuned_model_name:
    def gpt_fine_tuned(item):
        response = openai.chat.completions.create(
            model=fine_tuned_model_name,
            messages=messages_for(item),
            seed=42,
            max_tokens=7
        )
        reply = response.choices[0].message.content
        return get_price(reply)
    
    print("🧪 Testing fine-tuned model...")
    Tester.test(gpt_fine_tuned, test)
else:
    print("⏳ Fine-tuned model not ready yet. Please wait and re-run the previous cell.")


## Advanced Fine-tuning with Enhanced Prompts


In [None]:
# Enhanced prompt function (based on gold standard)
def messages_v2(item, with_price=True):
    system_message = (
        "Role: You are a retail price estimator.\n"
        "Market: United States; Currency: USD.\n"
        "Scope: Predict the most likely new retail price. Ignore taxes, shipping, coupons, bundles, used/renewed.\n"
        "Output: Only a number with two decimals (e.g., 129.99). No $ sign. No words.\n"
        "Think silently; do not reveal reasoning."
    )
    
    user_prompt = item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
    
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": str({
            "query": "price_estimate",
            "locale": "en_US",
            "currency": "USD",
            "category": item.category,
            "description": user_prompt,
            "brand": json.loads(item.details).get("Brand", "Unknown") if item.details else "Unknown"
        })},
        {"role": "assistant", "content": f"Price is ${item.price:.2f}" if with_price else "Price is $"}
    ]

print("✅ Enhanced prompt function created!")


In [None]:
# Create enhanced fine-tuning data
def make_jsonl_v2(items):
    result = ""
    for item in items:
        messages = messages_v2(item)
        messages_str = json.dumps(messages)
        result += '{"messages": ' + messages_str + '}\n'
    return result.strip()

def write_jsonl_v2(items, filename):
    with open(filename, "w") as f:
        jsonl = make_jsonl_v2(items)
        f.write(jsonl)

# Create enhanced fine-tuning files
write_jsonl_v2(fine_tune_train, "fine_tune_train_v2.jsonl")
write_jsonl_v2(fine_tune_validation, "fine_tune_validation_v2.jsonl")

print("✅ Enhanced fine-tuning files created:")
print("   - fine_tune_train_v2.jsonl")
print("   - fine_tune_validation_v2.jsonl")


In [None]:
# Upload enhanced files and create second fine-tuning job
with open("fine_tune_train_v2.jsonl", "rb") as f:
    train_file_v2 = openai.files.create(file=f, purpose="fine-tune")

with open("fine_tune_validation_v2.jsonl", "rb") as f:
    validation_file_v2 = openai.files.create(file=f, purpose="fine-tune")

# Create second fine-tuning job with enhanced prompts
fine_tuning_job_v2 = openai.fine_tuning.jobs.create(
    training_file=train_file_v2.id,
    validation_file=validation_file_v2.id,
    model="gpt-4o-mini",
    seed=42,
    hyperparameters={"n_epochs": 1},
    integrations=[wandb_integration],
    suffix="pricer-v2"
)

print(f"🚀 Enhanced fine-tuning job created: {fine_tuning_job_v2.id}")
print("⏳ This will take some time to complete...")


## Model Comparison and Results


In [None]:
# Test enhanced model (if ready)
try:
    enhanced_model_name = openai.fine_tuning.jobs.retrieve(fine_tuning_job_v2.id).fine_tuned_model
    
    def gpt_enhanced_fine_tuned(item):
        response = openai.chat.completions.create(
            model=enhanced_model_name,
            messages=messages_v2(item, with_price=False),
            seed=42,
            temperature=1.0,
            max_tokens=7
        )
        reply = response.choices[0].message.content
        return get_price(reply)
    
    print("🧪 Testing enhanced fine-tuned model...")
    Tester.test(gpt_enhanced_fine_tuned, test)
    
except:
    print("⏳ Enhanced fine-tuned model not ready yet.")
    print("💡 Please wait for completion and re-run this cell.")


## Summary and Next Steps


In [None]:
print("🎉 Week 6 Product Pricer Challenge Complete!")
print("=" * 50)

print("\n📊 What We Accomplished:")
print("✅ Loaded data using pickle files (our data loading hack)")
print("✅ Established baseline with GPT-4o")
print("✅ Implemented fine-tuning with OpenAI API")
print("✅ Created enhanced prompts for better performance")
print("✅ Set up comprehensive evaluation framework")

print("\n🚀 Next Steps:")
print("1. Wait for fine-tuning jobs to complete")
print("2. Compare performance of all models")
print("3. Experiment with different hyperparameters")
print("4. Try different base models (GPT-4.1, etc.)")
print("5. Implement ensemble methods")

print("\n💡 Key Learnings:")
print("• Fine-tuning can significantly improve model performance")
print("• Prompt engineering is crucial for good results")
print("• Data quality and quantity matter for fine-tuning")
print("• Evaluation metrics help track progress")

print("\n🎯 This implementation follows the gold standard approach")
print("   while incorporating our data loading improvements!")
