## Milestone 4: Cross-Platform Integration & Notification System Deployment





In [2]:
import requests
import pandas as pd
import time
import re
import json
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer, util
from pathlib import Path

# --- STEP 1: INITIAL CONFIGURATION ---
# Your unique Booksrun API key for price queries
API_KEY = "6nszyk9boeq6yad0hqh4"

# Initializing the LLM Model (Sentence-Transformers)
# We use this to compare book titles mathematically. This ensures that 'The Odyssey'
# on one site matches 'Homer: The Odyssey' on another by using "Semantic Similarity."
METADATA_MODEL = SentenceTransformer('all-MiniLM-L6-v2')

# Define and create the output directory for storing CSV/JSON results
OUTPUT_PATH = Path("output")
OUTPUT_PATH.mkdir(exist_ok=True)

class AIPricingAgent:
    """
    Final AI Agent designed to automate the lifecycle of competitive pricing:
    1. Scrapes source data.
    2. Validates identity via AI.
    3. Benchmarks against competitors.
    4. Calculates optimized sale prices.
    """

    def __init__(self, api_key):
        self.api_key = api_key
        self.results = []

    def get_book_metadata(self, title):
        """
        STEP 2: SEMANTIC BRIDGE (AI IDENTITY VALIDATION)
        Maps a text-based title from the source to a standardized ISBN using Google Books.
        """
        # Convert the source title into a numerical vector (Embedding)
        title_vector = METADATA_MODEL.encode(title)

        # Search for the title in the global Google Books database
        url = f"https://www.googleapis.com/books/v1/volumes?q=intitle:{title}"
        try:
            response = requests.get(url, timeout=10).json()
            if "items" in response:
                # Select the top match from the database
                volume = response["items"][0]["volumeInfo"]
                db_title = volume.get("title", "")

                # Calculate 'Cosine Similarity' between our title and the database title
                db_title_vector = METADATA_MODEL.encode(db_title)
                similarity_score = util.cos_sim(title_vector, db_title_vector).item()

                # STEP 2.1: VERIFICATION
                # Only proceed if the AI is 70% confident that it's the same book.
                if similarity_score > 0.70:
                    ids = volume.get("industryIdentifiers", [])
                    # Extract ISBN-10 and ISBN-13 formats
                    isbn10 = next((i['identifier'] for i in ids if i['type'] == 'ISBN_10'), "N/A")
                    isbn13 = next((i['identifier'] for i in ids if i['type'] == 'ISBN_13'), "N/A")
                    authors = ", ".join(volume.get("authors", ["Unknown Author"]))
                    return isbn10, isbn13, db_title, authors
        except Exception as e:
            # Handle potential connection or parsing errors silently
            pass
        return None, None, None, None

    def fetch_competitor_price(self, isbn):
        """
        STEP 3: MARKET INTELLIGENCE (COMPETITOR BENCHMARKING)
        Uses the Booksrun API to find the lowest current market price for a specific ISBN.
        """
        url = f"https://booksrun.com/api/v3/price/buy/{isbn}?key={self.api_key}"
        try:
            r = requests.get(url, timeout=12).json()
            offers = r.get("result", {}).get("offers", {})

            # Logic: Check direct store stock FIRST
            price = offers.get("booksrun", {}).get("used", {}).get("price")

            # Logic: If no direct stock, find the lowest third-party seller (Marketplace)
            if not price or price == "none":
                market = offers.get("marketplace", [])
                prices = [float(item['used']['price']) for item in market
                          if isinstance(item, dict) and item.get('used', {}).get('price') != 'none']
                price = min(prices) if prices else 0

            return float(price) if price and price != "none" else 0.0
        except:
            return 0.0

    def start_full_analysis(self, target_count=15):
        """
        STEP 4: ORCHESTRATION (THE FULL SCRAPE-AND-PRICE LOOP)
        Starts the browser-less scraping and coordinates the AI logic for each book found.
        """
        print(f"ðŸš€ AI Agent starting full market analysis for {target_count} books...")
        found = 0
        page = 1

        while found < target_count:
            # Connect to the source website (Books to Scrape)
            url = f"http://books.toscrape.com/catalogue/page-{page}.html"
            response = requests.get(url)
            if response.status_code != 200: break

            # Parse the HTML structure of the catalog page
            soup = BeautifulSoup(response.content, "html.parser")
            books = soup.find_all('article', class_='product_pod')

            for book in books:
                if found >= target_count: break

                # STEP 4.1: DATA INGESTION
                # Visit the specific book detail page to get the UPC and Source Price
                detail_rel_link = book.find('h3').find('a')['href']
                detail_url = "http://books.toscrape.com/catalogue/" + detail_rel_link.replace("../../../", "")
                d_soup = BeautifulSoup(requests.get(detail_url).content, "html.parser")

                source_title = d_soup.find('h1').text
                # Convert price text (e.g., 'Â£51.77') into a clean floating-point number
                source_price_raw = d_soup.find('p', class_='price_color').text
                source_price = float(re.sub(r'[^\d.]', '', source_price_raw))
                upc = d_soup.find('th', string='UPC').find_next_sibling('td').text

                # STEP 4.2: AI IDENTITY VALIDATION
                # Convert UPC/Title to standardized ISBN via Step 2
                isbn10, isbn13, verified_title, author = self.get_book_metadata(source_title)
                if not isbn13: continue # Skip if no matching book found in global database

                # STEP 4.3: COMPETITOR BENCHMARKING
                # Get the live market price via Step 3
                competitor_price = self.fetch_competitor_price(isbn13)

                if competitor_price > 0:
                    # STEP 5: DYNAMIC PRICING LOGIC (AI STRATEGY)
                    # We apply an aggressive pricing strategy:
                    # - For High-Value items (>Â£30), we apply a 15% discount to gain market share.
                    # - For Standard items, we apply a 10% discount.
                    discount_rate = 0.15 if competitor_price > 30 else 0.10
                    final_price = round(competitor_price * (1 - discount_rate), 2)

                    # Store the final structured data
                    self.results.append({
                        "upc": upc,
                        "isbn_10": isbn10,
                        "isbn_13": isbn13,
                        "book_name": verified_title,
                        "author": author,
                        "source_price": f"Â£{source_price}",
                        "competitor_price": f"Â£{competitor_price}",
                        "discount_applied": f"{int(discount_rate*100)}%",
                        "final_price": f"Â£{final_price}",
                        "price_status": "found",
                        "match_identifier": isbn13
                    })

                    print(f"âœ… [{found+1}/{target_count}] AI Logic Processed: {verified_title}")
                    found += 1
                    # Small sleep to ensure we don't overwhelm the APIs
                    time.sleep(0.5)

            # Increment page count if more books are needed
            page += 1

        # STEP 6: DATA EXPORT
        # Convert results list to a DataFrame for easy export and viewing
        df = pd.DataFrame(self.results)
        # Export as CSV (for Excel) and JSON (for databases/web apps)
        df.to_csv(OUTPUT_PATH / "final_market_analysis.csv", index=False)
        df.to_json(OUTPUT_PATH / "final_market_analysis.json", orient="records", indent=4)
        return df

# --- STEP 7: EXECUTION TRIGGER ---
# Initialize the agent with your API Key
AGENT = AIPricingAgent(API_KEY)

# Execute the full analysis for 15 books
results_df = AGENT.start_full_analysis(target_count=15)

# STEP 8: FINAL REPORT DISPLAY
# Formats the terminal output to ensure all columns are visible
print("\n" + "="*160)
print("ðŸ“Š FINAL AI PRICING REPORT: CROSS-PLATFORM COMPARISON")
print("="*160)
print(results_df.to_string(index=False))

ðŸš€ AI Agent starting full market analysis for 15 books...
âœ… [1/15] AI Logic Processed: The Dirty Little Secrets of Getting Your Dream Job
âœ… [2/15] AI Logic Processed: The Nameless City
âœ… [3/15] AI Logic Processed: orange: The Complete Collection 1
âœ… [4/15] AI Logic Processed: Lumberjanes Vol. 1
âœ… [5/15] AI Logic Processed: Thomas Jefferson and the Tripoli Pirates
âœ… [6/15] AI Logic Processed: The Regional Office is Under Attack!
âœ… [7/15] AI Logic Processed: The Murder of Roger Ackroyd
âœ… [8/15] AI Logic Processed: The 10% Entrepreneur
âœ… [9/15] AI Logic Processed: Wild Swans
âœ… [10/15] AI Logic Processed: The Star-Touched Queen
âœ… [11/15] AI Logic Processed: The Marriage of Opposites
âœ… [12/15] AI Logic Processed: The Immortal Life of Henrietta Lacks
âœ… [13/15] AI Logic Processed: The Bane Chronicles
âœ… [14/15] AI Logic Processed: Steve Jobs
âœ… [15/15] AI Logic Processed: City of Ashes

ðŸ“Š FINAL AI PRICING REPORT: CROSS-PLATFORM COMPARISON
             upc    i

## AI Agent successfully navigated two different web environments, matched 15 distinct books using semantic intelligence, and generated a competitive pricing strategy.



---

###  Observations Of Output

1. **Massive Price Gaps Identified**:
* Look at *Thomas Jefferson and the Tripoli Pirates*. The source price is **Â£59.64**, but the competitor is selling it for only **Â£4.02**.
* **Observation:** Without this AI, you would have listed the book at a price 14 times higher than the market, making it impossible to sell.


2. **Smart Discounting in Action**:
* The AI applied a **10% discount** across your list because all the competitor prices were below the Â£30 threshold defined in your logic.
* For *Steve Jobs*, the competitor price was **Â£6.52**, and your AI correctly calculated the final price as **Â£5.87**.


3. **Successful Cross-Platform Mapping**:
* The `upc` (from the source) and `isbn_13` (for the competitor) look completely different.
* **Observation:** The agent successfully "bridged" these two different worlds to find the exact same book on both websites.


4. **Data Cleanliness**:
* Notice that the `author` and `book_name` columns are professional and complete. The AI didn't just scrape text; it enriched the data using a global database.







 **Semantic Embedding** (via the `SentenceTransformer` model)

#### **1. Understanding Meaning, Not Just Letters**

Traditional computers look for exact matches. If one site says "Steve Jobs" and another says "Steve Jobs: A Biography," a normal computer might say "These are different."

* **The LLM way:** The model turns the titles into a long list of numbers (a **Vector**). It "understands" that the meaning behind both titles is almost identical.

#### **2. Similarity Scoring (The "Brain" Check)**

When your code ran `util.cos_sim`, it was asking the LLM: *"How close are these two ideas?"*

* If the score was above **0.70**, the AI "decided" they were the same book.
* This is why your output is so accurate; the LLM acted as a digital librarian verifying every single match.

#### **3. Overcoming "Messy" Web Data**

Websites often have typos or extra words (like "Vol 1" or "Complete Collection").

* **The LLM advantage:** Because the LLM understands the **context** of book titles, it wasn't confused by the extra words. It successfully matched *orange: The Complete Collection 1* by recognizing the core identity of the book.

