In [None]:

# HanuAi Assignment - Web Scraping & Sentiment Analysis

# Required Libraries:
# pip install selenium beautifulsoup4 pandas nltk

from selenium import webdriver
from selenium.webdriver.common.by import By
from bs4 import BeautifulSoup
import pandas as pd
import time
import uuid
import re
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk

nltk.download('vader_lexicon')

# Step 1: Open Product Page
driver = webdriver.Chrome()
product_url = "PASTE_PRODUCT_URL_HERE"
driver.get(product_url)
time.sleep(5)

# Step 2: Navigate to Reviews Section
try:
    reviews_tab = driver.find_element(By.XPATH, "//a[contains(text(),'Reviews')]")
    reviews_tab.click()
    time.sleep(3)
except:
    print("Reviews tab not found")

# Step 3: Pagination Handling
while True:
    try:
        show_more = driver.find_element(By.XPATH, "//button[contains(text(),'Show More')]")
        driver.execute_script("arguments[0].click();", show_more)
        time.sleep(2)
    except:
        break

# Step 4: Extract Data
soup = BeautifulSoup(driver.page_source, "html.parser")
reviews = soup.find_all("div", class_="review-item")

data = []
for review in reviews:
    review_id = str(uuid.uuid4())
    title = review.find("h3").text.strip() if review.find("h3") else ""
    review_text = review.find("p").text.strip() if review.find("p") else ""
    
    data.append({
        "ID": review_id,
        "Title": title,
        "Review": review_text,
        "Source": "BestBuy Canada"
    })

driver.quit()

df = pd.DataFrame(data)

# Step 5: Sentiment Analysis
sia = SentimentIntensityAnalyzer()

def get_sentiment(text):
    score = sia.polarity_scores(text)["compound"]
    if score > 0.05:
        return "Positive"
    elif score < -0.05:
        return "Negative"
    else:
        return "Neutral"

df["Sentiment"] = df["Review"].apply(get_sentiment)

df.to_csv("bestbuy_reviews_output.csv", index=False)
