In [7]:
import requests
import json
import pandas as pd

# API base URL
BASE_URL = "http://127.0.0.1:8080"


def start_scraping(max_products=10):
    """Start scraping new products"""
    response = requests.post(f"{BASE_URL}/refresh?max_products={max_products}")
    return response.json()

def check_health():
    """Check API health"""
    response = requests.get(f"{BASE_URL}/health")
    return response.json()

In [8]:
check_health()

{'status': 'healthy',
 'cached_products': 24,
 'scraping_active': False,
 'last_scraped': '2025-05-31T17:25:25.882339',
 'storage_file': 'products_database.json',
 'file_exists': True,
 'file_size_kb': 45.14}

In [9]:
start_scraping(max_products=10)

{'message': 'Started scraping 10 products',
 'status': 'processing',
 'current_total': 24}

In [13]:
def load_products_from_json():
    """Load products directly from JSON file"""
    try:
        with open('products_database.json', 'r') as f:
            return json.load(f)
    except FileNotFoundError:
        print("JSON file not found")
        return None


data = load_products_from_json()
products = data['products'] if data else []

if products:
    df = pd.DataFrame(products)

    # Basic info
    print("=== DATASET OVERVIEW ===")
    print(f"Total products: {len(df)}")
    print(f"Columns: {list(df.columns)}")
    print(f"\nData types:")
    print(df.dtypes)

    # Show first few products
    display(df[['title', 'price', 'rating']].head(50))

=== DATASET OVERVIEW ===
Total products: 15
Columns: ['title', 'price', 'description', 'url', 'rating', 'image_url', 'scraped_at', 'llm_summary', 'llm_highlights']

Data types:
title             object
price             object
description       object
url               object
rating            object
image_url         object
scraped_at        object
llm_summary       object
llm_highlights    object
dtype: object


Unnamed: 0,title,price,rating
0,"Sponsored HP - 15.6"" Touch-Screen Laptop - Int...",$529.99,
1,Sponsored ASUS ROG Strix G15 15.6” Gaming Lapt...,$1179.99,
2,"Sponsored Acer Aspire Go 15.6"" Laptop - Silver...",$399.99,(7 Reviews)
3,"Sponsored Dell - Inspiron 15.6"" 120Hz FHD 1920...",$519.99,
4,"Microsoft Surface Laptop 13"" Copilot+ PC Lapto...",$1499.99,(0 Reviews)
5,"Microsoft Surface Laptop 13"" Copilot+ PC Lapto...",$1499.99,(0 Reviews)
6,"Microsoft Surface Laptop 13"" Copilot+ PC Lapto...",$1499.99,(0 Reviews)
7,"Lenovo IdeaPad 1i 14"" Laptop - Abyss Blue (Int...",$199.99,(4 Reviews)
8,"Sponsored ASUS Zenbook DUO 14"" OLED Touchscree...",$1499.99,(54 Reviews)
9,"Sponsored ASUS Vivobook 14 Flip 14"" Touchscree...",$699.95,


In [14]:
products[-1]['llm_summary']

"The ASUS Vivobook 16 laptop is designed to handle everyday tasks with ease, thanks to its Intel Core i5-13420H processor, 16GB of RAM, and 512GB solid-state drive (SSD). This means you can expect smooth performance when browsing the web, working on documents, and streaming videos. The laptop's 16-inch display provides a comfortable viewing experience, and with Windows 11, you'll have access to the latest features and security updates. Overall, this laptop is suitable for students, professionals, and anyone looking for a reliable device for general use."

In [4]:
def examine_product(index):
    """Examine a specific product in detail"""
    if index < len(products):
        product = products[index]
        print(f"=== PRODUCT {index + 1} ===")
        print(f"Title: {product['title']}")
        print(f"Price: {product['price']}")
        print(f"Rating: {product['rating']}")
        print(f"URL: {product['url']}")
        print(f"\nLLM Summary:")
        print(f"{product['llm_summary']}")
        print(f"\nLLM Highlights:")
        for i, highlight in enumerate(product['llm_highlights'], 1):
            print(f"  {i}. {highlight}")
        print("="*60)
    else:
        print(f"Product {index + 1} not found")

for i in range(min(10, len(products))):
    examine_product(i)

=== PRODUCT 1 ===
Title: Sponsored HP - 15.6" Touch-Screen Laptop - Intel Core 6-Core i3 -1215u Up to 4.4GHz - 16GB Memory - 256GB SSD - Natural Silver, Intel UHD Graphics, Windows 11 Home, HDMI 1,4b, WIFI 5 Top Deal $529.99 
Price: $529.99
Rating: None
URL: https://www.bestbuy.ca//b.us5.us.criteo.com/rm?dest=https%3A%2F%2Fwww.bestbuy.ca%2Fen-ca%2Fproduct%2Fhp-15-6-touch-screen-laptop-intel-core-6-core-i3-1215u-up-to-4-4ghz-16gb-memory-256gb-ssd-natural-silver-intel-uhd-graphics-windows-11-home-hdmi-1-4b-wifi-5%2F17667138%3Fsource%3Dcategory%26adSlot%3D1%26slotPos%3D1&sig=1-qR7JOOqzlg3yWwVr16W4vrNJvy7X4PdwOSXCHUk29dU&rm_e=AtSrHgleyx7EJ5J0YRgdbDTrYltxRroRJp0XDMjUn86LG0yvCO3IfcnEnrZtBi-1O3j6u5rhkFWrJk3dSpBvFycW_r2We1Ila0jwqaf9U4Y5Xaed7iet-Cn-SxB2T2SdqK9v2CLU0HUbXyOo_DQQCEXlM3cIscSc-WqyQTdmTVuZTNFbU9k5Uut6HMHPqIZx9pKQtqSBEu9pPrNDQ2m8KQNBUi90dHJUhErA9UbvNJ6WJmCZkMjM-1PKtBeIjywVJojDwHwzO4mrIwZQqiljvC-HHuz2Ci8gBgwutJkRyJF_whzyr1LB2n1vHafVRbo5J3pL16efsz5YTZS3VRul8umc5LYS1Yx1AkJRkxXQnvd18CfrfX

In [27]:
start_scraping(max_products= 10)

ConnectionError: ('Connection aborted.', ConnectionResetError(54, 'Connection reset by peer'))

In [26]:
check_health()

{'status': 'healthy',
 'cached_products': 24,
 'scraping_active': False,
 'last_scraped': '2025-05-31T17:25:25.882339',
 'storage_file': 'products_database.json',
 'file_exists': True,
 'file_size_kb': 45.14}