In [5]:
!pip install playwright nest_asyncio
!playwright install chromium
!apt-get install libatk1.0-0 libatk-bridge2.0-0 libatspi2.0-0 libxcomposite1


Collecting playwright
  Downloading playwright-1.57.0-py3-none-manylinux1_x86_64.whl.metadata (3.5 kB)
Collecting pyee<14,>=13 (from playwright)
  Downloading pyee-13.0.0-py3-none-any.whl.metadata (2.9 kB)
Downloading playwright-1.57.0-py3-none-manylinux1_x86_64.whl (46.0 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m46.0/46.0 MB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyee-13.0.0-py3-none-any.whl (15 kB)
Installing collected packages: pyee, playwright
Successfully installed playwright-1.57.0 pyee-13.0.0
Downloading Chromium 143.0.7499.4 (playwright build v1200)[2m from https://cdn.playwright.dev/dbazure/download/playwright/builds/chromium/1200/chromium-linux.zip[22m
[1G164.7 MiB [] 0% 0.0s[0K[1G164.7 MiB [] 0% 12.0s[0K[1G164.7 MiB [] 0% 4.7s[0K[1G164.7 MiB [] 1% 3.3s[0K[1G164.7 MiB [] 2% 2.8s[0K[1G164.7 MiB [] 3% 2.3s[0K[1G164.7 MiB [] 4% 2.3s[0K[1

In [6]:
import nest_asyncio
nest_asyncio.apply()

import asyncio, json, csv, re
from pathlib import Path
from urllib.parse import urljoin
from playwright.async_api import async_playwright

BASE = "https://books.toscrape.com/"

RATING_MAP = {
    "One": 1,
    "Two": 2,
    "Three": 3,
    "Four": 4,
    "Five": 5
}

def parse_price(s):
    if not s:
        return None
    m = re.search(r"([0-9]+(?:\.[0-9]+)?)", s)
    return float(m.group(1)) if m else None

def parse_stock(s):
    if not s:
        return 0
    m = re.search(r"(\d+)", s)
    return int(m.group(1)) if m else 0

async def scrape_all_books():
    rows = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        ctx = await browser.new_context()
        page = await ctx.new_page()

        current_url = BASE
        page_num = 1
        book_counter = 0

        print("üî• Starting scraper‚Ä¶ going to homepage...\n")
        await page.goto(BASE, timeout=60000)

        while True:
            await page.wait_for_selector("article.product_pod", timeout=30000)

            print(f"üìÑ Scraping page {page_num} ...")

            cards = await page.query_selector_all("article.product_pod")

            for card in cards:
                book_counter += 1

                a = await card.query_selector("h3 > a")
                title = await a.get_attribute("title")
                rel_url = await a.get_attribute("href")
                product_url = urljoin(current_url, rel_url)

                price_text = (await (await card.query_selector("p.price_color")).text_content()).strip()
                price = parse_price(price_text)
                avail_text = (await (await card.query_selector("p.instock.availability")).text_content()).strip()
                stock = parse_stock(avail_text)
                rating_class = await (await card.query_selector(".star-rating")).get_attribute("class")

                rating = 0
                for k in RATING_MAP:
                    if k in (rating_class or ""):
                        rating = RATING_MAP[k]
                        break

                # Scrape details
                detail = await ctx.new_page()
                try:
                    await detail.goto(product_url, timeout=60000)

                    prod_table = await detail.query_selector_all("table.table.table-striped tr")
                    prod_info = {}
                    for tr in prod_table:
                        th = await tr.query_selector("th")
                        td = await tr.query_selector("td")
                        if th and td:
                            key = (await th.text_content()).strip()
                            val = (await td.text_content()).strip()
                            prod_info[key] = val

                    upc = prod_info.get("UPC")

                    desc_el = await detail.query_selector("#product_description + p")
                    description = (await desc_el.text_content()).strip() if desc_el else ""

                    img = await detail.query_selector(".carousel img")
                    img_src = await img.get_attribute("src") if img else None
                    image_url = urljoin(product_url, img_src) if img_src else None

                except Exception:
                    upc = None
                    description = ""
                    prod_info = {}
                    image_url = None

                finally:
                    await detail.close()

                rows.append({
                    "title": title,
                    "product_url": product_url,
                    "price": price,
                    "price_text": price_text,
                    "rating": rating,
                    "stock": stock,
                    "upc": upc,
                    "description": description,
                    "image_url": image_url,
                    **prod_info
                })

                # Print every 20 books for reassurance
                if book_counter % 20 == 0:
                    print(f"   ‚û§ Scraped {book_counter} books so far‚Ä¶ still going strong")

            print(f"‚úÖ Completed page {page_num}\n")

            # Next page
            next_a = await page.query_selector("li.next > a")
            if not next_a:
                break
            next_href = await next_a.get_attribute("href")
            next_url = urljoin(current_url, next_href)
            current_url = next_url
            page_num += 1

            await page.goto(next_url, timeout=60000)
            await asyncio.sleep(0.4)

        await browser.close()

    print(f"\nüéâ All done! Total books scraped = {book_counter}")
    return rows


# Run the scraper
data = asyncio.get_event_loop().run_until_complete(scrape_all_books())

print(f"\nüì¶ Final: collected {len(data)} books!\n")

# Save output
Path("output").mkdir(exist_ok=True)
csv_path = Path("output/books_all.csv")
json_path = Path("output/books_all.json")

if data:
    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        writer = csv.DictWriter(f, fieldnames=list(data[0].keys()))
        writer.writeheader()
        writer.writerows(data)

with open(json_path, "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print(f"üíæ Saved CSV ‚Üí {csv_path}")
print(f"üíæ Saved JSON ‚Üí {json_path}")


üî• Starting scraper‚Ä¶ going to homepage...

üìÑ Scraping page 1 ...
   ‚û§ Scraped 20 books so far‚Ä¶ still going strong
‚úÖ Completed page 1

üìÑ Scraping page 2 ...
   ‚û§ Scraped 40 books so far‚Ä¶ still going strong
‚úÖ Completed page 2

üìÑ Scraping page 3 ...
   ‚û§ Scraped 60 books so far‚Ä¶ still going strong
‚úÖ Completed page 3

üìÑ Scraping page 4 ...
   ‚û§ Scraped 80 books so far‚Ä¶ still going strong
‚úÖ Completed page 4

üìÑ Scraping page 5 ...
   ‚û§ Scraped 100 books so far‚Ä¶ still going strong
‚úÖ Completed page 5

üìÑ Scraping page 6 ...
   ‚û§ Scraped 120 books so far‚Ä¶ still going strong
‚úÖ Completed page 6

üìÑ Scraping page 7 ...
   ‚û§ Scraped 140 books so far‚Ä¶ still going strong
‚úÖ Completed page 7

üìÑ Scraping page 8 ...
   ‚û§ Scraped 160 books so far‚Ä¶ still going strong
‚úÖ Completed page 8

üìÑ Scraping page 9 ...
   ‚û§ Scraped 180 books so far‚Ä¶ still going strong
‚úÖ Completed page 9

üìÑ Scraping page 10 ...
   ‚û§ Scraped 200 boo

In [7]:
 import pandas as pd
import numpy as np
import re

df = pd.read_json("output/books_all.json")

# normalize columns
df['price'] = pd.to_numeric(df['price'], errors='coerce')
df['rating'] = pd.to_numeric(df['rating'], errors='coerce').fillna(0).astype(int)
df['stock'] = pd.to_numeric(df['stock'], errors='coerce').fillna(0).astype(int)

# If you don't have historical sales, create a simulated baseline_sales (monthly)
# Replace this with real sales when available.
np.random.seed(42)
df['baseline_monthly_sales'] = np.clip((100 / (df['price'] + 1)) * (df['rating'] / 3.0), 0.1, 500)
# add noise
df['baseline_monthly_sales'] = (df['baseline_monthly_sales'] * (0.6 + 0.8 * np.random.rand(len(df)))).round(1)


def compute_discount(row):
    disc = 0.0
    # 1) By rating: lower rating => higher discount
    if row['rating'] <= 1:
        disc += 0.25   # 25% for awful books
    elif row['rating'] == 2:
        disc += 0.15
    elif row['rating'] == 3:
        disc += 0.08
    elif row['rating'] >= 4:
        disc += 0.0   # no rating-based discount; maybe even price hike later

    # 2) Excess stock => extra discount
    if row['stock'] >= 50:
        disc += 0.10
    elif row['stock'] >= 20:
        disc += 0.05

    # 3) High price items (reduce barrier)
    if row['price'] and row['price'] > 50:
        disc += 0.05

    # 4) Protect margins: max discount cap
    return min(disc, 0.5)

df['discount_pct'] = df.apply(compute_discount, axis=1)
df['new_price'] = (df['price'] * (1 - df['discount_pct'])).round(2)

# Simple elasticity model to simulate sales change:
# Assume price elasticity of demand = -1.2 (example). Percentage change in quantity = elasticity * pct_change_price
elasticity = -1.2
df['pct_price_change'] = (df['new_price'] - df['price']) / df['price']
df['expected_pct_sales_change'] = elasticity * df['pct_price_change']
df['expected_new_monthly_sales'] = (df['baseline_monthly_sales'] * (1 + df['expected_pct_sales_change'])).clip(lower=0).round(1)

# Revenue & margin simulation: assume cost = price * cost_factor (e.g., 60% of price)
cost_factor = 0.6
df['cost_per_unit'] = (df['price'] * cost_factor).round(2)
df['baseline_monthly_revenue'] = (df['baseline_monthly_sales'] * df['price']).round(2)
df['new_monthly_revenue'] = (df['expected_new_monthly_sales'] * df['new_price']).round(2)
df['delta_revenue'] = (df['new_monthly_revenue'] - df['baseline_monthly_revenue']).round(2)

# Show top losers/gainers by delta revenue
top_gain = df.sort_values('delta_revenue', ascending=False).head(10)
top_loss = df.sort_values('delta_revenue').head(10)

display(top_gain[['title','price','new_price','discount_pct','baseline_monthly_sales','expected_new_monthly_sales','baseline_monthly_revenue','new_monthly_revenue','delta_revenue']])
display(top_loss[['title','price','new_price','discount_pct','baseline_monthly_sales','expected_new_monthly_sales','baseline_monthly_revenue','new_monthly_revenue','delta_revenue']])

# Save result
df.to_csv("output/books_pricing_simulation.csv", index=False)
print("Saved pricing simulation to output/books_pricing_simulation.csv")


Unnamed: 0,title,price,new_price,discount_pct,baseline_monthly_sales,expected_new_monthly_sales,baseline_monthly_revenue,new_monthly_revenue,delta_revenue
464,Out of Print: City Lights Spotlight No. 14,53.64,50.96,0.05,4.2,4.5,225.29,229.32,4.03
122,"A Piece of Sky, a Grain of Rice: A Memoir in F...",56.76,53.92,0.05,2.5,2.7,141.9,145.58,3.68
273,"Quarter Life Poetry: Poems for the Young, Brok...",50.89,48.35,0.05,4.3,4.6,218.83,222.41,3.58
337,Aristotle and Dante Discover the Secrets of th...,58.14,55.23,0.05,2.6,2.8,151.16,154.64,3.48
322,City of Glass (The Mortal Instruments #3),56.02,53.22,0.05,2.6,2.8,145.65,149.02,3.37
213,Amatus,50.54,48.01,0.05,2.5,2.7,126.35,129.63,3.28
549,The Man Who Mistook His Wife for a Hat and Oth...,59.45,56.48,0.05,2.7,2.9,160.52,163.79,3.27
823,Into the Wild,56.7,53.86,0.05,2.7,2.9,153.09,156.19,3.1
267,Shtum,55.84,53.05,0.05,2.7,2.9,150.77,153.84,3.07
584,What If?: Serious Scientific Answers to Absurd...,53.68,51.0,0.05,2.7,2.9,144.94,147.9,2.96


Unnamed: 0,title,price,new_price,discount_pct,baseline_monthly_sales,expected_new_monthly_sales,baseline_monthly_revenue,new_monthly_revenue,delta_revenue
704,"Unstuffed: Decluttering Your Home, Mind, and Soul",58.09,40.66,0.3,0.4,0.5,23.24,20.33,-2.91
57,The Pioneer Woman Cooks: Dinnertime: Comfort C...,56.41,39.49,0.3,0.4,0.5,22.56,19.74,-2.82
855,Darkfever (Fever #1),56.02,39.21,0.3,0.4,0.5,22.41,19.6,-2.81
56,The Secret of Dreadwillow Carse,56.13,39.29,0.3,0.4,0.5,22.45,19.64,-2.81
995,Alice in Wonderland (Alice's Adventures in Won...,55.53,38.87,0.3,0.4,0.5,22.21,19.43,-2.78
239,The Rosie Project (Don Tillman #1),54.04,37.83,0.3,0.4,0.5,21.62,18.92,-2.7
222,"We Are Robin, Vol. 1: The Vigilante Business (...",53.9,37.73,0.3,0.4,0.5,21.56,18.86,-2.7
899,Girl Online On Tour (Girl Online #2),53.47,37.43,0.3,0.4,0.5,21.39,18.72,-2.67
86,orange: The Complete Collection 1 (orange: The...,48.41,36.31,0.25,0.8,1.0,38.73,36.31,-2.42
766,The Case for Christ (Cases for Christianity),47.84,35.88,0.25,0.8,1.0,38.27,35.88,-2.39


Saved pricing simulation to output/books_pricing_simulation.csv


In [1]:
# === Cell: install libs (run once) ===
!pip install vaderSentiment sentence-transformers requests newsapi-python


Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Collecting newsapi-python
  Downloading newsapi_python-0.2.7-py2.py3-none-any.whl.metadata (1.2 kB)
Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl (125 kB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m126.0/126.0 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading newsapi_python-0.2.7-py2.py3-none-any.whl (7.9 kB)
Installing collected packages: vaderSentiment, newsapi-python
Successfully installed newsapi-python-0.2.7 vaderSentiment-3.3.2


In [2]:

import os
os.environ['NEWSAPI_KEY'] = '6d797f0d231e4a2786ecc6e5d5b14472'


In [3]:
# === Cell: imports & helpers ===
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from sentence_transformers import SentenceTransformer, util
from newsapi import NewsApiClient
import requests, datetime, math, os

# initialize
vader = SentimentIntensityAnalyzer()
embedder = SentenceTransformer('all-MiniLM-L6-v2')   # compact + fast

# Helper: sentiment (VADER)
def get_sentiment_vader(text):
    if not text:
        return {'compound': 0.0, 'neg':0.0, 'neu':1.0, 'pos':0.0}
    return vader.polarity_scores(text)

# Helper: fetch headlines via NewsAPI (requires KEY). Fallback to Google News RSS if no key.
def fetch_top_headlines(api_key=None, query=None, language='en', page_size=50):
    headlines = []
    if api_key:
        newsapi = NewsApiClient(api_key=api_key)
        top = newsapi.get_top_headlines(q=query, language=language, page_size=page_size)
        for a in top.get('articles', []):
            headlines.append({'title': a.get('title') or '', 'desc': a.get('description') or '', 'source': a.get('source',{}).get('name')})
        return headlines
    # RSS fallback (simple): Google News RSS for a query or general top stories
    rss_url = 'https://news.google.com/rss' if not query else f'https://news.google.com/rss/search?q={requests.utils.requote_uri(query)}'
    r = requests.get(rss_url, timeout=20)
    if r.status_code == 200:
        # naive parse:
        import xml.etree.ElementTree as ET
        root = ET.fromstring(r.content)
        for item in root.findall('.//item')[:page_size]:
            title = item.find('title').text if item.find('title') is not None else ''
            desc = item.find('description').text if item.find('description') is not None else ''
            headlines.append({'title': title, 'desc': desc, 'source': 'rss'})
    return headlines

# Helper: compute similarity between one text and many headlines (returns max similarity and best headline)
def max_headline_similarity(text, headlines, encode_cache=None):
    if not text or not headlines:
        return 0.0, None
    # encode product text
    v_text = embedder.encode(text, convert_to_tensor=True)
    # encode headlines quickly (cache embeddings if repeating)
    headlines_text = [ (h.get('title','') + ' ' + (h.get('desc') or '')) for h in headlines ]
    h_emb = embedder.encode(headlines_text, convert_to_tensor=True)
    sims = util.pytorch_cos_sim(v_text, h_emb)[0]   # tensor
    best_idx = int(sims.argmax())
    best_score = float(sims[best_idx])
    return best_score, headlines[best_idx]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [8]:
# === Cell: policy parameters & integration with your df ===
# Configure
NEWS_API_KEY = os.getenv('NEWSAPI_KEY')  # set in Colab env, or None to use RSS fallback
MAX_NEWS_BOOST = 0.20   # maximum additional discount from news (20%)
NEWS_SIM_THRESH = 0.55  # threshold to consider relevant
EXAM_KEYWORDS = ['exam','textbook','revision','study','guide','solved','practice','curriculum']
TRAVEL_KEYWORDS = ['travel','vacation','flight','tourism','trip','holiday','visa']

MIN_MARGIN = 0.05      # don't go below 5% margin
DISCOUNT_CAP = 0.5     # never exceed 50% discount

# fetch recent headlines (global)
headlines = fetch_top_headlines(api_key=NEWS_API_KEY, page_size=50)
print(f"Fetched {len(headlines)} headlines (sample):", headlines[:3])

# compute sentiment + news relevance for each product
def compute_signals_for_df(df):
    # pre-encode all descriptions once for speed (if you prefer)
    descs = df['description'].fillna('').astype(str).tolist()
    desc_embs = embedder.encode(descs, convert_to_tensor=True)

    # encode headlines
    h_texts = [ (h.get('title','') + ' ' + (h.get('desc') or '')) for h in headlines ]
    h_embs = embedder.encode(h_texts, convert_to_tensor=True) if h_texts else None

    news_global_intensity = 1.0  # could be derived from trending score or volume

    results = []
    for i, row in df.iterrows():
        desc = str(row.get('description') or '')
        sent = get_sentiment_vader(desc)
        compound = sent['compound']

        # news similarity (fast with pre-encoded arrays)
        if h_embs is not None and len(h_embs) > 0:
            sim_scores = util.pytorch_cos_sim(desc_embs[i], h_embs)[0]
            best_idx = int(sim_scores.argmax())
            best_sim = float(sim_scores[best_idx])
            best_headline = headlines[best_idx]
        else:
            best_sim, best_headline = 0.0, None

        # compute news discount contribution (if above threshold)
        news_discount = 0.0
        if best_sim >= NEWS_SIM_THRESH:
            news_discount = news_global_intensity * best_sim * MAX_NEWS_BOOST

        # exam/travel keyword boost from description or category
        text_lower = (desc + ' ' + str(row.get('product_type','')) + ' ' + str(row.get('category',''))).lower()
        exam_boost = any(k in text_lower for k in EXAM_KEYWORDS)
        travel_boost = any(k in text_lower for k in TRAVEL_KEYWORDS)

        # sentiment adjustment mapping:
        if compound <= -0.4:
            sent_adj = 0.15   # negative -> more discount
        elif compound <= -0.15:
            sent_adj = 0.08
        elif compound >= 0.5:
            sent_adj = -0.06  # positive -> reduce discount
        elif compound >= 0.25:
            sent_adj = -0.03
        else:
            sent_adj = 0.0

        # exam/travel final additive
        seasonal_discount = 0.0
        if exam_boost:
            seasonal_discount += 0.12
        if travel_boost:
            seasonal_discount += 0.10

        results.append({
            'vader_compound': compound,
            'best_headline': best_headline,
            'best_headline_sim': best_sim,
            'news_discount': round(news_discount, 4),
            'sent_adj': sent_adj,
            'seasonal_discount': seasonal_discount
        })
    signals_df = pd.DataFrame(results, index=df.index)
    return pd.concat([df, signals_df], axis=1)

df = compute_signals_for_df(df)


Fetched 30 headlines (sample): [{'title': 'Stock futures rise as big week of economic data kicks off: Live updates - CNBC', 'desc': 'The moves come after the S&P 500 and Nasdaq Composite fell last week, as Oracle and Broadcom led a rotation away from artificial intelligence stocks.', 'source': 'CNBC'}, {'title': 'Morocco floods: Dozens killed in port city of Safi after heavy rains - BBC', 'desc': "At least 70 homes have been inundated in Safi's old city centre, officials say.", 'source': 'BBC News'}, {'title': 'Houses floated away in this Alaska Native village. Now residents want to move - NPR', 'desc': 'Kwigillingok, Alaska, has long grappled with erosion and flooding. Residents want to move to higher ground, further inland, especially after the remnants of Typhoon Halong damaged nearly every house.', 'source': 'NPR'}]


In [10]:
# === Cell: combine discounts & protect margins, recalc revenues (uses your elasticity code) ===
def final_pricing(df):
    df = df.copy()
    # start with your base discount and new_price
    # ensure discount_pct exists from earlier compute_discount
    df['base_discount'] = df.get('discount_pct', 0.0)
    # combine
    df['combined_discount'] = df['base_discount'] + df['news_discount'].fillna(0) + df['sent_adj'].fillna(0) + df['seasonal_discount'].fillna(0)
    # cap
    df['combined_discount'] = df['combined_discount'].clip(lower=-0.3, upper=DISCOUNT_CAP)  # allow small negative discount (price increase) to -30% at most

    # compute new price and protect margin
    df['proposed_new_price'] = (df['price'] * (1 - df['combined_discount'])).round(2)
    df['cost_per_unit'] = (df['price'] * 0.6).round(2)  # your earlier cost factor
    # enforce min margin
    min_allowed_price = (df['cost_per_unit'] * (1 + MIN_MARGIN)).round(2)
    price_violations = df['proposed_new_price'] < min_allowed_price
    df.loc[price_violations, 'proposed_new_price'] = min_allowed_price[price_violations]
    # re-calc effective_discount if we bumped price up to meet margin
    df['effective_discount'] = 1 - (df['proposed_new_price'] / df['price'])

    # re-run elasticity simulation like before
    elasticity = -1.2
    df['pct_price_change'] = (df['proposed_new_price'] - df['price']) / df['price']
    df['expected_pct_sales_change'] = elasticity * df['pct_price_change']
    df['expected_new_monthly_sales'] = (df['baseline_monthly_sales'] * (1 + df['expected_pct_sales_change'])).clip(lower=0).round(1)
    df['new_monthly_revenue'] = (df['expected_new_monthly_sales'] * df['proposed_new_price']).round(2)
    df['baseline_monthly_revenue'] = (df['baseline_monthly_sales'] * df['price']).round(2)
    df['delta_revenue'] = (df['new_monthly_revenue'] - df['baseline_monthly_revenue']).round(2)

    return df

df = final_pricing(df)
# inspect top winners/losers
display(df.sort_values('delta_revenue', ascending=False).head(10)[['title','price','proposed_new_price','effective_discount','vader_compound','best_headline_sim','news_discount','sent_adj','seasonal_discount','delta_revenue']])
display(df.sort_values('delta_revenue').head(10)[['title','price','proposed_new_price','effective_discount','vader_compound','best_headline_sim','news_discount','sent_adj','seasonal_discount','delta_revenue']])

# Save updated simulation
df.to_csv('output/books_pricing_with_news_sentiment.csv', index=False)
print("Saved pricing with news+sentiment to output/books_pricing_with_news_sentiment.csv")


Unnamed: 0,title,price,proposed_new_price,effective_discount,vader_compound,best_headline_sim,news_discount,sent_adj,seasonal_discount,delta_revenue
286,Luis Paints the World,53.95,50.17,0.070065,0.967,0.165523,0.0,-0.06,0.0,3.23
481,Future Shock (Future Shock #1),55.65,50.64,0.090027,0.9717,0.198165,0.0,-0.06,0.1,3.22
441,Art and Fear: Observations on the Perils (and ...,48.63,45.71,0.060045,0.7872,0.191863,0.0,-0.06,0.12,2.91
27,Aladdin and His Wonderful Lamp,53.13,49.41,0.070017,0.9864,0.080211,0.0,-0.06,0.0,2.81
275,"Overload: How to Unplug, Unwind, and Unleash Y...",52.15,48.5,0.06999,0.9873,0.188911,0.0,-0.06,0.0,2.77
807,Me Talk Pretty One Day,57.6,49.54,0.139931,0.9632,0.125382,0.0,-0.06,0.0,2.77
648,The Perfect Play (Play by Play #1),59.99,55.79,0.070012,0.9956,0.173418,0.0,-0.06,0.0,2.76
529,How to Cook Everything Vegetarian: Simple Meat...,46.01,43.25,0.059987,0.9967,0.10094,0.0,-0.06,0.12,2.76
165,Suddenly in Love (Lake Haven #1),55.99,48.15,0.140025,0.991,0.260303,0.0,-0.06,0.0,2.69
543,A Short History of Nearly Everything,52.4,47.68,0.090076,0.9802,0.250914,0.0,-0.06,0.1,2.55


Unnamed: 0,title,price,proposed_new_price,effective_discount,vader_compound,best_headline_sim,news_discount,sent_adj,seasonal_discount,delta_revenue
70,The Art Forger,40.76,26.49,0.350098,-0.7008,0.15201,0.0,0.15,0.12,-9.59
758,The Demon-Haunted World: Science as a Candle i...,52.25,35.53,0.32,-0.9089,0.307288,0.0,0.15,0.12,-8.98
467,Man's Search for Meaning,29.48,19.16,0.350068,-0.8603,0.191196,0.0,0.15,0.12,-8.86
921,"The Sandman, Vol. 1: Preludes and Nocturnes (T...",54.12,34.09,0.370103,-0.8535,0.12677,0.0,0.15,0.1,-8.79
322,City of Glass (The Mortal Instruments #3),56.02,39.21,0.300071,-0.9551,0.234702,0.0,0.15,0.1,-8.41
88,On a Midnight Clear,14.07,9.43,0.32978,-0.9719,0.202603,0.0,0.15,0.1,-8.31
489,"A Murder Over a Girl: Justice, Gender, Junior ...",13.2,8.84,0.330303,-0.988,0.454897,0.0,0.15,0.1,-8.29
226,"Unbroken: A World War II Story of Survival, Re...",45.95,28.95,0.369967,-0.975,0.280812,0.0,0.15,0.1,-7.94
964,Myriad (Prentor #1),58.75,39.95,0.32,-0.9804,0.174344,0.0,0.15,0.12,-7.75
811,"Life, the Universe and Everything (Hitchhiker'...",33.26,20.96,0.369814,-0.9724,0.236396,0.0,0.15,0.1,-7.69


Saved pricing with news+sentiment to output/books_pricing_with_news_sentiment.csv


In [11]:
print(f"Total headlines fetched: {len(headlines)}\n")

for i, h in enumerate(headlines, start=1):
    print(f"{i}. {h.get('title')}")
    if h.get('desc'):
        print(f"   ‚Ü≥ {h.get('desc')}")
    print(f"   Source: {h.get('source')}\n")


Total headlines fetched: 30

1. Stock futures rise as big week of economic data kicks off: Live updates - CNBC
   ‚Ü≥ The moves come after the S&P 500 and Nasdaq Composite fell last week, as Oracle and Broadcom led a rotation away from artificial intelligence stocks.
   Source: CNBC

2. Morocco floods: Dozens killed in port city of Safi after heavy rains - BBC
   ‚Ü≥ At least 70 homes have been inundated in Safi's old city centre, officials say.
   Source: BBC News

3. Houses floated away in this Alaska Native village. Now residents want to move - NPR
   ‚Ü≥ Kwigillingok, Alaska, has long grappled with erosion and flooding. Residents want to move to higher ground, further inland, especially after the remnants of Typhoon Halong damaged nearly every house.
   Source: NPR

4. What's behind the wellness claims for the synthetic dye methylene blue? - NPR
   ‚Ü≥ Taking drops of a medical dye is trending in wellness and biohacking circles. Some influencers claim it boosts longevity and brain 