In [10]:
# ✅ Install gradio and other dependencies if not already
# !pip install gradio selenium beautifulsoup4 transformers emoji pandas

import gradio as gr
import pandas as pd
import emoji
from transformers import pipeline
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
from urllib.parse import urlparse, parse_qs
import time

# ✅ Helper: Get Flipkart review page
def get_review_url(product_url):
    parsed_url = urlparse(product_url)
    pid = parse_qs(parsed_url.query).get("pid", [None])[0]
    path_parts = parsed_url.path.split("/")
    product_slug = path_parts[path_parts.index("p") - 1] if "p" in path_parts else "product"
    return f"https://www.flipkart.com/{product_slug}/product-reviews/{pid}?pid={pid}"

# ✅ Main logic
def scrape_and_summarize(product_url):
    review_url = get_review_url(product_url)
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')
    options.add_argument('--no-sandbox')
    options.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 15)
    driver.get(review_url)

    # Sort by "Most Recent"
    try:
        sort_dropdown = wait.until(EC.presence_of_element_located((By.NAME, "sortFilter")))
        Select(sort_dropdown).select_by_value("MOST_RECENT")
        time.sleep(2)
    except:
        pass

    # Fetch product image
    try:
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        img_tag = soup.find('img', class_='DByuf4')
        product_img_url = img_tag['src'] if img_tag else None
    except:
        product_img_url = None

    # Scrape reviews
    reviews_data = []
    seen_reviews = set()
    while len(reviews_data) < 200:
        try:
            wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'ZmyHeo')))
            soup = BeautifulSoup(driver.page_source, 'html.parser')
            review_containers = soup.find_all('div', class_='col')

            for container in review_containers:
                try:
                    review = container.find('div', class_='ZmyHeo')
                    title = container.find('p', class_='z9E0IG')
                    rating = container.find('div', class_='XQDdHH')
                    if review and (title.text, review.text) not in seen_reviews:
                        reviews_data.append({
                            "Title": title.text.strip(),
                            "Review": emoji.replace_emoji(review.text.strip().replace("READ MORE", ""), replace=''),
                            "Rating": int(rating.text.strip()),
                        })
                        seen_reviews.add((title.text, review.text))
                    if len(reviews_data) >= 200:
                        break
                except:
                    continue

            # Click next
            next_btn = driver.find_elements(By.XPATH, "//span[text()='Next']")
            if next_btn:
                driver.execute_script("arguments[0].click();", next_btn[0])
                time.sleep(2)
            else:
                break
        except:
            break

    driver.quit()
    df = pd.DataFrame(reviews_data)

    # ➕ Split by sentiment
    pos_df = df[df["Rating"] >= 4]
    neg_df = df[df["Rating"] <= 2]
    neu_df = df[df["Rating"] == 3]

    total_reviews = len(df)
    pos_count = len(pos_df)
    neg_count = len(neg_df)
    neu_count = len(neu_df)
    avg_rating = round(df["Rating"].mean(), 2) if not df.empty else 0

    # ➕ Summarize
    summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

    def summarize_text(df):
        text = ". ".join(df["Review"].tolist())
        words = text.split()
        chunks = [" ".join(words[i:i + 500]) for i in range(0, len(words), 500)]
        summaries = [summarizer(chunk, max_length=130, min_length=60, do_sample=False)[0]['summary_text'] for chunk in chunks]
        return " ".join(summaries)

    pos_sum = summarize_text(pos_df) if not pos_df.empty else "No positive reviews."
    neg_sum = summarize_text(neg_df) if not neg_df.empty else "No negative reviews."
    neu_sum = summarize_text(neu_df) if not neu_df.empty else "No neutral reviews."

    # ➕ Final summary
    final_input = f"Positive: {pos_sum} Negative: {neg_sum} Neutral: {neu_sum}"
    final_summary = summarizer(final_input, max_length=180, min_length=60, do_sample=False)[0]['summary_text']

    image_html = f"<img src='{product_img_url}' width='200'>" if product_img_url else "Image not available"
    return (
        image_html,
        pos_sum,
        neg_sum,
        neu_sum,
        final_summary,
        f"{total_reviews}",
        f"{pos_count}",
        f"{neg_count}",
        f"{neu_count}",
        f"{avg_rating}"
    )


# Gradioo Portion

with gr.Blocks(css="""
body {
    background-color: #f8fafc;
    font-family: 'Segoe UI', sans-serif;
}
#main-frame {
    max-width: 1200px;
    margin: auto;
    background-color: #ffffff;
    padding: 25px;
    border-radius: 14px;
    box-shadow: 0 0 15px rgba(0,0,0,0.08);
}
.gr-textbox label, .gr-markdown, .gr-button, .gr-html {
    color: #0a154f;
}
.stat-card input {
    font-size: 1.6em !important;
    font-weight: bold !important;
    border: none !important;
    background-color: transparent !important;
    text-align: center;
}
#pos-box input { color: #16a34a !important; }   /* Green */
#neg-box input { color: #dc2626 !important; }   /* Red */
#neu-box input { color: #f59e0b !important; }   /* Amber */
#total-box input, #avg-box input { color: #2563eb !important; } /* Blue */
#verdict-box {
    border: 1px solid #cbd5e1;
    padding: 12px;
    background-color: #f1f5f9;
    border-radius: 10px;
    font-size: 1.05em;
}
""") as demo:

    with gr.Column(elem_id="main-frame"):
        gr.Markdown("""
            <h1 style='color: #2e7d32; text-align:center;'>Review Pulse: 
            A DL-Powered Review Tracker for Smarter E-Commerce Choices</h1>
        """)

        url_input = gr.Textbox(label="🔗 Enter the product link here")
        submit_btn = gr.Button("Analyze Reviews")

        # ➤ Stat Cards
        with gr.Row():
            total_out = gr.Textbox(label="Total reviews considered", interactive=False, elem_classes=["stat-card"], elem_id="total-box")
            pos_count_out = gr.Textbox(label="# Positive Reviews", interactive=False, elem_classes=["stat-card"], elem_id="pos-box")
            neg_count_out = gr.Textbox(label="# Negative Reviews", interactive=False, elem_classes=["stat-card"], elem_id="neg-box")
            neu_count_out = gr.Textbox(label="# Neutral Reviews", interactive=False, elem_classes=["stat-card"], elem_id="neu-box")
            avg_rating_out = gr.Textbox(label="Avg Rating", interactive=False, elem_classes=["stat-card"], elem_id="avg-box")

        # ➤ Main Content: Image & Summaries
        with gr.Row():
            with gr.Column(scale=1):
                img_output = gr.HTML(label="Product Image")
            with gr.Column(scale=2):
                with gr.Tabs():
                    with gr.Tab("🟢 Positive"):
                        pos_box = gr.Textbox(label="Summary of Positive Reviews", lines=7, interactive=False)
                    with gr.Tab("🔴 Negative"):
                        neg_box = gr.Textbox(label="Summary of Negative Reviews", lines=7, interactive=False)
                    with gr.Tab("🟡 Neutral"):
                        neu_box = gr.Textbox(label="Summary of Neutral Reviews", lines=7, interactive=False)

        # ➤ Final Verdict
        gr.Markdown("<h4>🧠 Final Verdict</h4>")
        final_output = gr.Textbox(lines=4, interactive=False, elem_id="verdict-box")

        # ➤ Hook Up Logic
        submit_btn.click(
            fn=scrape_and_summarize,
            inputs=url_input,
            outputs=[
                img_output, pos_box, neg_box, neu_box, final_output,
                total_out, pos_count_out, neg_count_out, neu_count_out, avg_rating_out
            ]
        )

# ✅ Launch
demo.launch(inline=True)


Running on local URL:  http://127.0.0.1:7868

To create a public link, set `share=True` in `launch()`.




Device set to use cpu
Your max_length is set to 130, but your input_length is only 81. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)
Device set to use cpu
Your max_length is set to 130, but your input_length is only 92. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=46)
Your max_length is set to 130, but your input_length is only 16. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=8)
