In [2]:
import requests
import pandas as pd
import time
import re
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer, util
from pathlib import Path

# ================= BASIC SETUP =================
BOOKSRUN_KEY = "wrsqfc9aahe5lkby82sn"

EXPORT_DIR = Path("output_different_books")
EXPORT_DIR.mkdir(exist_ok=True)

TEXT_MODEL = SentenceTransformer("all-MiniLM-L6-v2")

BASE_URL = "http://books.toscrape.com/catalogue/page-{}.html"
DETAIL_BASE = "http://books.toscrape.com/catalogue/"

# ================= AGENT DEFINITION =================
class IntelligentPricingSystemDifferentBooks:

    def __init__(self, api_key):
        self.api_key = api_key
        self.records = []

    # -------- GOOGLE BOOKS AI MATCH --------
    def resolve_isbn(self, raw_title):
        base_vector = TEXT_MODEL.encode(raw_title)
        url = f"https://www.googleapis.com/books/v1/volumes?q=intitle:{raw_title}"

        try:
            data = requests.get(url, timeout=10).json()
            if "items" not in data:
                return None, None, None, None

            info = data["items"][0]["volumeInfo"]
            candidate_title = info.get("title", "")

            compare_vector = TEXT_MODEL.encode(candidate_title)
            confidence = util.cos_sim(base_vector, compare_vector).item()

            if confidence < 0.70:
                return None, None, None, None

            identifiers = info.get("industryIdentifiers", [])
            isbn10 = next((x["identifier"] for x in identifiers if x["type"] == "ISBN_10"), None)
            isbn13 = next((x["identifier"] for x in identifiers if x["type"] == "ISBN_13"), None)
            author = ", ".join(info.get("authors", ["Unknown"]))

            return isbn10, isbn13, candidate_title, author
        except:
            return None, None, None, None

    # -------- BOOKSRUN PRICE --------
    def competitor_lookup(self, isbn):
        endpoint = f"https://booksrun.com/api/v3/price/buy/{isbn}?key={self.api_key}"
        try:
            res = requests.get(endpoint, timeout=10).json()
            offers = res.get("result", {}).get("offers", {})

            store_price = offers.get("booksrun", {}).get("used", {}).get("price")
            if store_price and store_price != "none":
                return float(store_price)

            market = offers.get("marketplace", [])
            prices = [
                float(m["used"]["price"])
                for m in market
                if m.get("used", {}).get("price") not in (None, "none")
            ]
            return min(prices) if prices else 0.0
        except:
            return 0.0

    # -------- MAIN EXECUTION (FIXED) --------
    def execute(self, count=15, start_page=5, skip_per_page=3):
        collected = 0
        page_index = start_page

        print(f"\nðŸš€ AI Pricing Started (Different Books | Page {start_page}+)\n")

        while collected < count:
            page = requests.get(BASE_URL.format(page_index))
            if page.status_code != 200:
                break

            soup = BeautifulSoup(page.content, "html.parser")
            items = soup.select("article.product_pod")[skip_per_page:]

            for item in items:
                if collected >= count:
                    break

                rel_link = item.find("h3").find("a")["href"].replace("../../../", "")
                detail_url = DETAIL_BASE + rel_link
                detail = BeautifulSoup(requests.get(detail_url).content, "html.parser")

                title = detail.find("h1").text
                price_txt = detail.find("p", class_="price_color").text
                our_price = float(re.sub(r"[^\d.]", "", price_txt))
                upc = detail.find("th", string="UPC").find_next("td").text

                isbn10, isbn13, clean_title, author = self.resolve_isbn(title)
                if not isbn13:
                    continue

                market_price = self.competitor_lookup(isbn13)
                if market_price <= 0:
                    continue

                # -------- SUCCESS STATUS --------
                status = "found"
                identifier = isbn13

                discount = 0.15 if market_price > 30 else 0.10
                final_price = round(market_price * (1 - discount), 2)

                self.records.append({
                    "Book Title": clean_title,
                    "UPC": upc,
                    "ISBN-10": isbn10,
                    "ISBN-13": isbn13,
                    "Our Price (Â£)": our_price,
                    "Competitor Price (Â£)": market_price,
                    "Discount": f"{int(discount*100)}%",
                    "Final Price (Â£)": final_price,
                    "status": status,
                    "match_identifier": identifier
                })

                collected += 1
                print(f"âœ… [{collected}/{count}] {clean_title}")
                time.sleep(0.4)

            page_index += 1

        df = pd.DataFrame(self.records)
        df.to_csv(EXPORT_DIR / "ai_pricing_different_books.csv", index=False)
        df.to_json(EXPORT_DIR / "ai_pricing_different_books.json", indent=4, orient="records")

        print("\nðŸŽ‰ AI PRICING COMPLETED (DIFFERENT BOOK SET)\n")
        return df


# ================= RUN =================
engine = IntelligentPricingSystemDifferentBooks(BOOKSRUN_KEY)
final_df = engine.execute(count=15, start_page=5, skip_per_page=3)

print("=" * 160)
print("ðŸ“Š FINAL AI PRICING REPORT â€“ DIFFERENT BOOKS")
print("=" * 160)
print(final_df.to_string(index=False))


ðŸš€ AI Pricing Started (Different Books | Page 5+)

âœ… [1/15] Lumberjanes Vol. 1
âœ… [2/15] Throne of Glass
âœ… [3/15] Thomas Jefferson and the Tripoli Pirates
âœ… [4/15] The Murder of Roger Ackroyd
âœ… [5/15] The Matchmaker's Playbook
âœ… [6/15] The 10% Entrepreneur
âœ… [7/15] Redeeming Love
âœ… [8/15] Wild Swans
âœ… [9/15] The Star-Touched Queen
âœ… [10/15] The Immortal Life of Henrietta Lacks
âœ… [11/15] The Bane Chronicles
âœ… [12/15] Steve Jobs
âœ… [13/15] City of Ashes
âœ… [14/15] Brain on Fire
âœ… [15/15] The Star-Touched Queen

ðŸŽ‰ AI PRICING COMPLETED (DIFFERENT BOOK SET)

ðŸ“Š FINAL AI PRICING REPORT â€“ DIFFERENT BOOKS
                              Book Title              UPC    ISBN-10       ISBN-13  Our Price (Â£)  Competitor Price (Â£) Discount  Final Price (Â£) status match_identifier
                      Lumberjanes Vol. 1 b6ee99bcf06fc91f 1608866874 9781608866878          45.61                  2.98      10%             2.68  found    9781608866878
               