In [None]:
# Date: 05.02.25
# Note: lots of mucking around as GPT has a different format to Gem.
# Edited and re-created code to make it work.

# Basically there are a number of functions, some of which are called in the same function that makes the 
# openai api call. Gem can't work like that, so took same functions, combined them, then made them accessible
# via single function, then when called outputs a text prompt (with all the same data as the original set up)
# but just in a different format, that is then passed to gem to get the output. 
# Works fine. Started it up with the tester class, but i am using the free teir (15 RPM), so can't 
# complete tester. Results were looking good before ran out of api calls.

# In summary, this takes a product and semantically searches a db for similar items. Those similar items
# are passed to the llm (kind of like multi-shot prompting) and used to determine a price for the item in 
# question. Kind of cool.

# The Agent stuff failed as that is still using Open AI

# Date: 08.02.25
# Note: Got agent working (canned Ed approach, and started fresh with creating the Class for the agent). 
# It now works, but has dependencies on parts of this code. I will note the required cells with -- AGENT --

# The Price is Right

Today we build a more complex solution for estimating prices of goods.

1. Day 2.0 notebook: create a RAG database with our 400,000 training data
2. Day 2.1 notebook: visualize in 2D
3. Day 2.2 notebook: visualize in 3D
4. Day 2.3 notebook: build and test a RAG pipeline with GPT-4o-mini
5. Day 2.4 notebook: (a) bring back our Random Forest pricer (b) Create a Ensemble pricer that allows contributions from all the pricers

Phew! That's a lot to get through in one day!

## PLEASE NOTE:

We already have a very powerful product estimator with our proprietary, fine-tuned LLM. Most people would be very satisfied with that! The main reason we're adding these extra steps is to deepen your expertise with RAG and with Agentic workflows.

## We will go fast today! Hold on to your hat..

In [None]:
# -- AGENT --
# imports

import os
import re
import math
import json
from tqdm import tqdm
import random
from dotenv import load_dotenv
from huggingface_hub import login
import matplotlib.pyplot as plt
import numpy as np
import pickle
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import chromadb
from items import Item
from testing import Tester

In [None]:
# environment

load_dotenv()
#os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')

In [11]:
# Initialize and constants

load_dotenv(override=True)
api_key = os.getenv('OPENAI_API_KEY')

if api_key and api_key.startswith('sk-proj-') and len(api_key)>10:
    print("API key looks good so far")
else:
    print("There might be a problem with your API key? Please visit the troubleshooting notebook!")
    
# MODEL = 'gpt-4o-mini'
# openai = OpenAI()

# ----- Replacing with Gem code -----
import os
import google.generativeai as genai

genai.configure(api_key= api_key)

message = "this is a test only"
def gem_llm(message):
  generation_config = {
    "temperature": 1,
    "top_p": 0.95,
    "top_k": 40,
    "max_output_tokens": 8192,
    "response_mime_type": "text/plain",
    #"response_mime_type": "application/json",
  }

  model = genai.GenerativeModel(model_name="gemini-1.5-flash",
    generation_config=generation_config,)

  chat_session = model.start_chat(history=[  ])
  response = chat_session.send_message(message)
  return response.text
print(gem_llm(message))

There might be a problem with your API key? Please visit the troubleshooting notebook!
Understood.  I await your instructions for the test.



In [None]:
# -- AGENT --
# Load in the test pickle file
# See the section "Back to the PKL files" in the day2.0 notebook
# for instructions on obtaining this test.pkl file

# with open('test.pkl', 'rb') as file:
#     test = pickle.load(file)

with open('test_lite.pkl', 'rb') as file:
    test = pickle.load(file)    

In [None]:
def make_context(similars, prices):
    message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
    for similar, price in zip(similars, prices):
        message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
    return message

In [None]:
# def messages_for(item, similars, prices):
#     system_message = "You estimate prices of items. Reply only with the price, no explanation"
#     user_prompt = make_context(similars, prices)
#     user_prompt += "And now the question for you:\n\n"
#     user_prompt += item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
#     return [
#         {"role": "system", "content": system_message},
#         {"role": "user", "content": user_prompt},
#         {"role": "assistant", "content": "Price is $"}
#     ]


# Need to modify for Gem
def messages_for(item, similars, prices):
    #system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt += make_context(similars, prices)
    user_prompt += "And now the question for you:\n\n"
    user_prompt += item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        #{"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Price is $"}
    ]

In [None]:
# -- AGENT --
DB = "products_vectorstore"

In [None]:
# -- AGENT --
client = chromadb.PersistentClient(path=DB)
collection = client.get_or_create_collection('products')

[94mchromadb.telemetry.product.posthog:[0m Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.


In [None]:
def description(item):
    text = item.prompt.replace("How much does this cost to the nearest dollar?\n\n", "")
    return text.split("\n\nPrice is $")[0]

In [None]:
description(test[0])

In [None]:
modelembed = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
def vector(item):
    return modelembed.encode([description(item)])

In [None]:
def find_similars(item):
    results = collection.query(query_embeddings=vector(item).astype(float).tolist(), n_results=5)
    documents = results['documents'][0][:]
    prices = [m['price'] for m in results['metadatas'][0][:]]
    return documents, prices

In [None]:
#print(test[1].prompt)

In [None]:
# merged cell from below into this one
documents, prices = find_similars(test[1])
print(make_context(documents, prices))

In [None]:
# My code to replace existing
def get_price(s):
    s = s.replace('$','').replace(',','')
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 0


def generate_message_for_item(item):
    # Find similar items and their prices
    results = collection.query(query_embeddings=vector(item).astype(float).tolist(), n_results=5)
    documents = results['documents'][0][:]
    prices = [m['price'] for m in results['metadatas'][0][:]]

    # Create the context message for similar items
    message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
    for similar, price in zip(documents, prices):
        message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
    
    # Construct the user prompt with the context and item-specific question
    user_prompt = "You estimate prices of items. Reply only with the price, no explanation\n"
    user_prompt += "Do NOT provide esitmates, ranages, or suggestions. Just provide your estimated price\n"
    user_prompt += "Try to be as accurate as possible, based on the examples and information you have"
    user_prompt += message
    user_prompt += "And now the question for you:\n\n"
    user_prompt += item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
    
    # Return the complete message structure
    # return [
    #     {"role": "user", "content": user_prompt},
    #     {"role": "assistant", "content": "Price is $"}
    # ]
    return user_prompt

print('-'*10, 'desc', '-'*10)
item = test[49]
print(generate_message_for_item(item))
a = generate_message_for_item(item)
response = chat_session.send_message(a)
b = get_price(response.text)
print('-'*10, 'price', '-'*10)
print(b)


In [None]:
def gpt_4o_mini_rag(item):
    a = generate_message_for_item(item)
    #response = chat_session.send_message(a)
    response = gem_llm(a)
    b = get_price(response)
    return b
print('-'*10, 'price', '-'*10)
gpt_4o_mini_rag(test[1])
print(b)

In [None]:
print(messages_for(test[1], documents, prices))

In [None]:
# Can delete - not part of working code
import google.generativeai as genai
def gemini_flash_rag(item):
    documents, prices = find_similars(item)
    
    # Construct the message content
    messages = messages_for(item, documents, prices)
    generate_content(messages)

    # Extract reply from Gemini's response
    reply = response.text if hasattr(response, 'text') else ""

    return get_price(reply)
gemini_flash_rag(test[1])


In [None]:
# Can delete - not part of working code
import google.generativeai as genai
def gemini_flash_rag(item):
    documents, prices = find_similars(item)
    
    # Construct the message content
    messages = messages_for(item, documents, prices)

    # Configure the Gemini model
    #model = genai.GenerativeModel("gemini-1.5-flash")
    #model = flash
    # response = model.generate_content(messages)

    # # Extract reply from Gemini's response
    # reply = response.text if hasattr(response, 'text') else ""
    reply = chat_session.send_message(messages)


    return get_price(reply)
gemini_flash_rag(test[1])


In [None]:
# gpt_4o_mini_rag(test[1])
print(len(test[0:25]))

In [None]:
test[1].price

In [None]:
Tester.test(gpt_4o_mini_rag, test)

## Optional Extra: Trying a DeepSeek API call instead of OpenAI

If you have a DeepSeek API key, we will use it here as an alternative implementation; otherwise skip to the next section..

In [None]:
# Connect to DeepSeek using the OpenAI client python library

deepseek_api_key = os.getenv("DEEPSEEK_API_KEY")
deepseek_via_openai_client = OpenAI(api_key=deepseek_api_key,base_url="https://api.deepseek.com")

In [None]:
# Added some retry logic here because DeepSeek is very oversubscribed and sometimes fails..

def deepseek_api_rag(item):
    documents, prices = find_similars(item)
    retries = 8
    done = False
    while not done and retries > 0:
        try:
            response = deepseek_via_openai_client.chat.completions.create(
                model="deepseek-chat", 
                messages=messages_for(item, documents, prices),
                seed=42,
                max_tokens=8
            )
            reply = response.choices[0].message.content
            done = True
        except Exception as e:
            print(f"Error: {e}")
            retries -= 1
    return get_price(reply)

In [None]:
deepseek_api_rag(test[1])

In [None]:
Tester.test(deepseek_api_rag, test)

## And now to wrap this in an "Agent" class

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
from agents.fat import fat
#from agents.frontier_agentt1s2 import FrontierAgentt1s2
#from agents.frontier_agent import FrontierAgent
response = fat.gem_llm(message)  # Call the method on the instance
print(response)

In [None]:
# Let's print the logs so we can see what's going on

import logging
root = logging.getLogger()
root.setLevel(logging.INFO)

In [None]:
# ----- My agent code -----

In [7]:
# -- test agent (not full code, only part)
from agents.fat import fat
fat_instance = fat(collection)  # Create an instance of the class
message = "How much does this laptop cost?"
response = fat_instance.gem_llm(message)  # Call the method on the instance
print(response)


NameError: name 'collection' is not defined

In [18]:
# -- Force reload of agent (need when making change)
import importlib
from agents import fat

importlib.reload(fat)
from agents.fat import fat


In [19]:
# -- Call agent
from agents.fat import fat
item = test[42]
# Initialize an instance of the class
agent = fat(collection)

# Example usage (assuming you have an Item object)
#price_estimate = agent.gpt_4o_mini_rag(item)
price_estimate = agent.price(item)

print(price_estimate)

[94mfat:[0m Initializing Fat class
[94mfat:[0m Initializing Fat class with collection
[94msentence_transformers.SentenceTransformer:[0m Use pytorch device_name: cpu
[94msentence_transformers.SentenceTransformer:[0m Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2
[94mfat:[0m Extracted description from item


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[94mfat:[0m Generated message for item
[94mfat:[0m Received response from LLM
[94mfat:[0m Extracted price: 28.99
[94mfat:[0m Estimated price: 28.99


28.99


In [21]:
test[20]

<Dryer Heating Element, Replacement with OEM Part Number DC47-00019A and P13312, Dryer Repair Kit Replacement for Old or Broken Heating Element, Easy to Install = $15.39>

In [None]:
# ----- Below is ed code (plus some of my code)-----

In [None]:
agent5 = fat(collection)
#agent = FrontierAgentt1s2(collection)
#agent = FrontierAgent(collection)

In [None]:
#agent.price("Quadcast HyperX condenser mic for high quality podcasting")
agent5.price("Quadcast HyperX condenser mic for high quality podcasting")

In [None]:
from agents.specialist_agent import SpecialistAgent

In [None]:
agent2 = SpecialistAgent()

In [None]:
agent2.price("Quadcast HyperX condenser mic for high quality podcasting")

In [None]:
print('are you working?')

In [None]:
# testing my agent set up...

In [None]:
def get_price(s):
    s = s.replace('$','').replace(',','')
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 0

def generate_message_for_item(item):
    # Find similar items and their prices
    results = collection.query(query_embeddings=vector(item).astype(float).tolist(), n_results=5)
    documents = results['documents'][0][:]
    prices = [m['price'] for m in results['metadatas'][0][:]]

    # Create the context message for similar items
    message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
    for similar, price in zip(documents, prices):
        message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
    
    # Construct the user prompt with the context and item-specific question
    user_prompt = "You estimate prices of items. Reply only with the price, no explanation\n"
    user_prompt += "Do NOT provide esitmates, ranages, or suggestions. Just provide your estimated price\n"
    user_prompt += "Try to be as accurate as possible, based on the examples and information you have"
    user_prompt += message
    user_prompt += "And now the question for you:\n\n"
    user_prompt += item.test_prompt().replace(" to the nearest dollar", "").replace("\n\nPrice is $", "")
    
    # Return the complete message structure
    # return [
    #     {"role": "user", "content": user_prompt},
    #     {"role": "assistant", "content": "Price is $"}
    # ]
    return user_prompt

def price(item: str) -> float:
    """
    Make a call to OpenAI or DeepSeek to estimate the price of the described product,
    by looking up 5 similar products and including them in the prompt to give context
    :param description: a description of the product
    :return: an estimate of the price
    """

    a = generate_message_for_item(item)
    response = chat_session.send_message(a)
    b = get_price(response.text)
    return b
item = test[1]
price(item)
print(price(item))

In [None]:
print('hello')