# Autonomous Deal Agent

An intelligent system that automatically discovers, evaluates, and surfaces the best deals from across the web using AI-powered price estimation and web scraping.

## Overview

This project builds an autonomous agent that monitors RSS feeds and online sources for deals, evaluates them using ensemble ML models, and delivers personalized deal notifications to users. The system combines multiple AI technologies including fine-tuned pricing models, GPT-4 for parsing, RAG for context, and intelligent web scraping.

## Key Features

### 🤖 AI-Powered Price Estimation
- **Fine-tuned Specialist Pricer Model**: Custom-trained model deployed on Modal for accurate price predictions
- **Ensemble Architecture**: Multiple specialist models work together to determine fair market value
- **GPT-4 Frontend with RAG**: Advanced context-aware pricing using Retrieval-Augmented Generation
- **Price Comparison**: Automatically compares deal prices against estimated market value to identify true bargains

### 🕷️ Intelligent Web Scraping
- **Multi-Source Aggregation**: Scrapes deals from RSS feeds and Reddit
- **AI-Powered Parser**: Uses frontier AI models (GPT-4) to intelligently parse and extract deal information from various website structures
- **Adaptive Scraping**: Handles different site formats and layouts automatically
- **Data Enrichment**: Extracts product details, pricing, features, and purchase links

### 💎 Deal Discovery & Analysis
- **Automated Deal Scanning**: Continuously monitors configured sources for new deals
- **Opportunity Detection**: Identifies deals where market price significantly exceeds offer price
- **Deal Scoring**: Ranks deals by discount percentage and estimated value
- **Memory System**: Tracks previously surfaced deals to avoid duplicates

### 🖥️ User Interface
- **Gradio Web UI**: Clean, intuitive interface for browsing discovered deals
- **Deal Details**: Displays product description, pricing, estimated value, and discount percentage
- **Direct Purchase Links**: One-click access to deal pages

### 📲 Push Notifications
- **Real-Time Alerts**: Instant notifications when high-value deals are discovered
- **Push Notification Integration**: Uses Pushover/similar service for mobile alerts
- **Customizable Thresholds**: Configure minimum discount percentage for notifications



### Project Imports

In [None]:
# Standard library imports
import os
import re
import math
import json
import random
import pickle

# Third-party imports
from dotenv import load_dotenv
from huggingface_hub import login
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from openai import OpenAI
from sentence_transformers import SentenceTransformer
from datasets import load_dataset
import chromadb
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.manifold import TSNE
import plotly.graph_objects as go
import modal
import gradio as gr

# Local imports
from pricer_ephemeral import app, price
from items import Item
from testing import Tester
from agents.deals import ScrapedDeal, DealSelection, Opportunity, Deal
from agents.messaging_agent import MessagingAgent
from deal_agent_framework import DealAgentFramework
from agents.planning_agent import PlanningAgent

### Loading environment variables for Open AI and Hugging Face

In [None]:
load_dotenv(override=True)
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN', 'your-key-if-not-using-env')
DB = "products_vectorstore"

In [None]:
hf_token = os.environ['HF_TOKEN']
login(hf_token, add_to_git_credential=True)

In [None]:
from items import Item

### Setting up modal to deploy the pricer Service

In [None]:
with modal.enable_output():
    with app.run():
        result=price.remote("Quadcast HyperX condenser mic, connects via usb-c to your computer for crystal clear audio")
result

In [None]:
!modal deploy -m pricer_service

### Setting up RAG to provide relevant Price Context to GPT - to Improve Accuracy

In [None]:
with open('train.pkl', 'rb') as file:
    train = pickle.load(file)

In [None]:
client = chromadb.PersistentClient(path=DB)

In [None]:
collection_name = "products"

existing_collection_names = client.list_collections()

if collection_name in existing_collection_names:
    client.delete_collection(collection_name)
    print(f"Deleted existing collection: {collection_name}")
client.delete_collection(collection_name)
collection = client.create_collection(collection_name)

In [None]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
# Pass in a list of texts, get back a numpy array of vectors

vector = model.encode(["Well hi there"])[0]
vector

In [None]:
def description(item):
    text = item.prompt.replace("How much does this cost to the nearest dollar?\n\n", "")
    return text.split("\n\nPrice is $")[0]

In [None]:
description(train[0])

In [None]:
NUMBER_OF_DOCUMENTS = len(train)

for i in tqdm(range(0, NUMBER_OF_DOCUMENTS, 1000)):
    documents = [description(item) for item in train[i: i+1000]]
    vectors = model.encode(documents).astype(float).tolist()
    metadatas = [{"category": item.category, "price": item.price} for item in train[i: i+1000]]
    ids = [f"doc_{j}" for j in range(i, i+len(documents))]
    collection.add(
        ids=ids,
        documents=documents,
        embeddings=vectors,
        metadatas=metadatas
    )

In [None]:
MAXIMUM_DATAPOINTS = 30_000

In [None]:
DB = "products_vectorstore"
client = chromadb.PersistentClient(path=DB)

In [None]:
collection = client.get_or_create_collection('products')

In [None]:
CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']
COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']

In [None]:
# Prework
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=MAXIMUM_DATAPOINTS)
vectors = np.array(result['embeddings'])
documents = result['documents']
categories = [metadata['category'] for metadata in result['metadatas']]
colors = [COLORS[CATEGORIES.index(c)] for c in categories]

In [None]:
MAXIMUM_DATAPOINTS = 20_000


In [None]:
DB = "products_vectorstore"
client = chromadb.PersistentClient(path=DB)

In [None]:
collection = client.get_or_create_collection('products')

In [None]:
CATEGORIES = ['Appliances', 'Automotive', 'Cell_Phones_and_Accessories', 'Electronics','Musical_Instruments', 'Office_Products', 'Tools_and_Home_Improvement', 'Toys_and_Games']
COLORS = ['red', 'blue', 'brown', 'orange', 'yellow', 'green' , 'purple', 'cyan']

In [None]:
# Prework
result = collection.get(include=['embeddings', 'documents', 'metadatas'], limit=MAXIMUM_DATAPOINTS)
vectors = np.array(result['embeddings'])
documents = result['documents']
categories = [metadata['category'] for metadata in result['metadatas']]
colors = [COLORS[CATEGORIES.index(c)] for c in categories]

In [None]:
# Let's try 3D!

tsne = TSNE(n_components=3, random_state=42, n_jobs=-1)
reduced_vectors = tsne.fit_transform(vectors)

In [None]:

# Create the 3D scatter plot
fig = go.Figure(data=[go.Scatter3d(
    x=reduced_vectors[:, 0],
    y=reduced_vectors[:, 1],
    z=reduced_vectors[:, 2],
    mode='markers',
    marker=dict(size=3, color=colors, opacity=0.7),
)])

fig.update_layout(
    title='3D Chroma Vector Store Visualization',
    scene=dict(xaxis_title='x', yaxis_title='y', zaxis_title='z'),
    width=1200,
    height=800,
    margin=dict(r=20, b=10, l=10, t=40)
)

fig.show()

In [None]:
openai = OpenAI()

In [None]:
with open('test.pkl', 'rb') as file:
    test = pickle.load(file)

In [None]:
def make_context(similars, prices):
    message = "To provide some context, here are some other items that might be similar to the item you need to estimate.\n\n"
    for similar, price in zip(similars, prices):
        message += f"Potentially related product:\n{similar}\nPrice is ${price:.2f}\n\n"
    return message

In [None]:
def messages_for(item, similars, prices):
    system_message = "You estimate prices of items. Reply only with the price, no explanation"
    user_prompt = make_context(similars, prices)
    user_prompt += "And now the question for you:\n\n"
    user_prompt += item.test_prompt().replace(" to the nearest dollar","").replace("\n\nPrice is $","")
    return [
        {"role": "system", "content": system_message},
        {"role": "user", "content": user_prompt},
        {"role": "assistant", "content": "Price is $"}
    ]

In [None]:
DB = "products_vectorstore"

In [None]:
client = chromadb.PersistentClient(path=DB)
collection = client.get_or_create_collection('products')

In [None]:
def description(item):
    text = item.prompt.replace("How much does this cost to the nearest dollar?\n\n", "")
    return text.split("\n\nPrice is $")[0]

In [None]:
description(test[0])

In [None]:
model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
def vector(item):
    return model.encode([description(item)])

In [None]:
def find_similars(item):
    results = collection.query(query_embeddings=vector(item).astype(float).tolist(), n_results=5)
    documents = results['documents'][0][:]
    prices = [m['price'] for m in results['metadatas'][0][:]]
    return documents, prices

In [None]:
print(make_context(documents, prices))

In [None]:
print(messages_for(test[1], documents, prices))

In [None]:
def get_price(s):
    s = s.replace('$','').replace(',','')
    match = re.search(r"[-+]?\d*\.\d+|\d+", s)
    return float(match.group()) if match else 0

### GPT 4o Mini - + RAG

In [None]:
# The function for gpt-4o-mini

def gpt_4o_mini_rag(item):
    documents, prices = find_similars(item)
    response = openai.chat.completions.create(
        model="gpt-4o-mini", 
        messages=messages_for(item, documents, prices),
        seed=42,
        max_tokens=5
    )
    reply = response.choices[0].message.content
    return get_price(reply)

In [None]:
gpt_4o_mini_rag(test[1])

In [None]:
test[1].price

In [None]:
Tester.test(gpt_4o_mini_rag, test)

In [None]:
from agents.frontier_agent import FrontierAgent

In [None]:
agent = FrontierAgent(collection)

In [None]:
client = chromadb.PersistentClient(path=DB)
collection = client.get_or_create_collection('products')

In [None]:
result = collection.get(include=['embeddings', 'documents', 'metadatas'])
vectors = np.array(result['embeddings'])
documents = result['documents']
prices = [metadata['price'] for metadata in result['metadatas']]

### Finetuning Random Forest Model for product Price Prediction

In [None]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)
rf_model.fit(vectors, prices)

In [None]:
joblib.dump(rf_model, 'random_forest_model.pkl')

In [None]:
rf_model = joblib.load('random_forest_model.pkl')

### Agents for prediction of the price - RF, Specialist Agent Frontier Agent + RAG

In [None]:
from agents.specialist_agent import SpecialistAgent
from agents.frontier_agent import FrontierAgent
from agents.random_forest_agent import RandomForestAgent

In [None]:
specialist = SpecialistAgent()
frontier = FrontierAgent(collection)
random_forest = RandomForestAgent()

In [None]:
def description(item):
    return item.prompt.split("to the nearest dollar?\n\n")[1].split("\n\nPrice is $")[0]

In [None]:
def rf(item):
    return random_forest.price(description(item))

In [None]:
Tester.test(rf, test)

In [None]:
product = "Quadcast HyperX condenser mic for high quality audio for podcasting"

In [None]:
print(specialist.price(product))
print(frontier.price(product))
print(random_forest.price(product))

In [None]:
specialists = []
frontiers = []
random_forests = []
prices = []
for item in tqdm(test[1000:1250]):
    text = description(item)
    specialists.append(specialist.price(text))
    frontiers.append(frontier.price(text))
    random_forests.append(random_forest.price(text))
    prices.append(item.price)

In [None]:
mins = [min(s,f,r) for s,f,r in zip(specialists, frontiers, random_forests)]
maxes = [max(s,f,r) for s,f,r in zip(specialists, frontiers, random_forests)]

X = pd.DataFrame({
    'Specialist': specialists,
    'Frontier': frontiers,
    'RandomForest': random_forests,
    'Min': mins,
    'Max': maxes,
})

# Convert y to a Series
y = pd.Series(prices)

In [None]:
# Train a Linear Regression
np.random.seed(42)

lr = LinearRegression()
lr.fit(X, y)

feature_columns = X.columns.tolist()

for feature, coef in zip(feature_columns, lr.coef_):
    print(f"{feature}: {coef:.2f}")
print(f"Intercept={lr.intercept_:.2f}")

In [None]:
joblib.dump(lr, 'ensemble_model.pkl')

In [None]:
from agents.ensemble_agent import EnsembleAgent
ensemble = EnsembleAgent(collection)

In [None]:
ensemble.price(product)

In [None]:
def ensemble_pricer(item):
    return max(0,ensemble.price(description(item)))

In [None]:
Tester.test(ensemble_pricer, test)

### The Scraper Agent - Fetches deals from websites and Formats them specially

In [None]:
deals = ScrapedDeal.fetch(show_progress=True)

In [None]:
len(deals)

In [None]:
deals[0].describe()

In [None]:
system_prompt = """You identify and summarize the 5 most detailed deals from a list, by selecting deals that have the most detailed, high quality description and the most clear price.
Respond strictly in JSON with no explanation, using this format. You should provide the price as a number derived from the description. If the price of a deal isn't clear, do not include that deal in your response.
Most important is that you respond with the 5 deals that have the most detailed product description with price. It's not important to mention the terms of the deal; most important is a thorough description of the product.
Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. 

{"deals": [
    {
        "product_description": "Your clearly expressed summary of the product in 4-5 sentences. Details of the item are much more important than why it's a good deal. Avoid mentioning discounts and coupons; focus on the item itself. There should be a paragpraph of text for each item you choose.",
        "price": 99.99,
        "url": "the url as provided"
    },
    ...
]}"""

In [None]:
user_prompt = """Respond with the most promising 5 deals from this list, selecting those which have the most detailed, high quality product description and a clear price.
Respond strictly in JSON, and only JSON. You should rephrase the description to be a summary of the product itself, not the terms of the deal.
Remember to respond with a paragraph of text in the product_description field for each of the 5 items that you select.
Be careful with products that are described as "$XXX off" or "reduced by $XXX" - this isn't the actual price of the product. Only respond with products when you are highly confident about the price. 

Deals:

"""
user_prompt += '\n\n'.join([deal.describe() for deal in deals])

In [None]:
print(user_prompt[:2000])

In [None]:
def get_recommendations():
    completion = openai.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
      ],
        response_format=DealSelection
    )
    result = completion.choices[0].message.parsed
    return result

In [None]:
result = get_recommendations()

In [None]:
len(result.deals)

In [None]:
result.deals[1]

In [None]:
from agents.scanner_agent import ScannerAgent

In [None]:
agent = ScannerAgent()
result = agent.scan()

In [None]:
result

In [None]:
DB = "products_vectorstore"

In [None]:
agent = MessagingAgent()

### Planning Agent 

In [None]:

DB = "products_vectorstore"
client = chromadb.PersistentClient(path=DB)
collection = client.get_or_create_collection('products')


In [None]:
planner = PlanningAgent(collection)

In [None]:
planner.plan()

### Gradio UI For Data Visualization

In [None]:
agent_framework = DealAgentFramework()
agent_framework.init_agents_as_needed()

with gr.Blocks(title="The Price is Right", fill_width=True) as ui:

    initial_deal = Deal(product_description="Example description", price=100.0, url="https://cnn.com")
    initial_opportunity = Opportunity(deal=initial_deal, estimate=200.0, discount=100.0)
    opportunities = gr.State([initial_opportunity])

    def get_table(opps):
        return [[opp.deal.product_description, opp.deal.price, opp.estimate, opp.discount, opp.deal.url] for opp in opps]

    def do_select(opportunities, selected_index: gr.SelectData):
        row = selected_index.index[0]
        opportunity = opportunities[row]
        agent_framework.planner.messenger.alert(opportunity)

    with gr.Row():
        gr.Markdown('<div style="text-align: center;font-size:24px">"The Price is Right" - Deal Hunting Agentic AI</div>')
    with gr.Row():
        gr.Markdown('<div style="text-align: center;font-size:14px">Deals surfaced so far:</div>')
    with gr.Row():
        opportunities_dataframe = gr.Dataframe(
            headers=["Description", "Price", "Estimate", "Discount", "URL"],
            wrap=True,
            column_widths=[4, 1, 1, 1, 2],
            row_count=10,
            col_count=5,
            max_height=400,
        )

    ui.load(get_table, inputs=[opportunities], outputs=[opportunities_dataframe])
    opportunities_dataframe.select(do_select, inputs=[opportunities], outputs=[])

ui.launch(inbrowser=True)

### Run the application

In [None]:
!python price_is_right_final.py