# The Product Pricer Continued

A model that can estimate how much something costs, from its description.

## Fine Tune GPT Model

In [46]:
import os 
import re
import math 
import json 
import random 
from dotenv import load_dotenv
from huggingface_hub import login 
import matplotlib.pyplot as plt 
import numpy as np 
import pickle 
from collections import Counter
### LLM models 
from openai import OpenAI
from anthropic import Anthropic

### Intrnal Classes
from items import Item 
from testing import Tester 

In [7]:
### Environment 

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
HF_TOKEN = os.getenv("HF_TOKEN")

login(HF_TOKEN, add_to_git_credential=True)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.


In [8]:
%matplotlib inline

In [9]:
openai_client = OpenAI() 
claude_client = Anthropic()

In [15]:
### Load pickle files of data

with open("train.pkl", "rb") as f: 
    train = pickle.load(f)

with open("test.pkl", "rb") as f: 
    test = pickle.load(f)

In [16]:
### OpenAI recommends fine-tuning with populations of 50-100 examples
### But as the examples I am using is very small, so I will go with 200 examples (and 1 epoch)

fine_tune_train = train[:200]
fine_tune_validation = train[200:250]

In [17]:
def messages_for(item):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": f"Price is ${item.price:.2f}"}
    ]

In [31]:
# Convert the items into a list of json objects - a "jsonl" string
# Each row represents a message in the form:
# {"messages" : [{"role": "system", "content": "You estimate prices...

def make_jsonl(items): 
    result = ""
    for item in items: 
        messages = messages_for(item)
        msg_json = json.dumps(messages)
        result += '{"messages": '+ msg_json +'}\n'

    return result.strip()

In [42]:
### Write items to jsonl files

def write_jsonl(items, filename): 
    with open(filename, "w") as f: 
        jsonl = make_jsonl(items)
        f.write(jsonl)

In [49]:
write_jsonl(fine_tune_train, "fine_tune_train.jsonl")

In [50]:
write_jsonl(fine_tune_validation, "fine_tune_validation.jsonl")

In [51]:
### Upload the train file to OpenAI 

with open("fine_tune_train.jsonl", "rb") as f: 
    train_file = openai_client.files.create(file=f, purpose="fine-tune")

In [52]:
train_file

FileObject(id='file-L6kdKB2naRZmbz6p5rfHPN', bytes=184390, created_at=1762937314, filename='fine_tune_train.jsonl', object='file', purpose='fine-tune', status='processed', expires_at=None, status_details=None)

In [53]:
### Upload the validation file to OpenAI 

with open("fine_tune_validation.jsonl", "rb") as f: 
    validation_file = openai_client.files.create(file=f, purpose="fine-tune")

In [54]:
validation_file

FileObject(id='file-QEwuP1z9gHdRGMteSgo1zg', bytes=45578, created_at=1762937421, filename='fine_tune_validation.jsonl', object='file', purpose='fine-tune', status='processed', expires_at=None, status_details=None)