In [42]:
!pip install selenium beautifulsoup4 textblob pandas matplotlib undetected-chromedriver setuptools
!python -m textblob.download_corpora

Finished.


[nltk_data] Downloading package brown to
[nltk_data]     C:\Users\jayth\AppData\Roaming\nltk_data...
[nltk_data]   Package brown is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\jayth\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\jayth\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     C:\Users\jayth\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package conll2000 to
[nltk_data]     C:\Users\jayth\AppData\Roaming\nltk_data...
[nltk_data]   Package conll2000 is already up-to-date!
[nltk_data] Downloading package movie_reviews to
[nltk_data]     C:\Users\jayth\AppData\Roaming\nltk_data...
[nltk_data]   Package movie_reviews is alr

In [43]:
import time
import pandas as pd
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from textblob import TextBlob
import matplotlib.pyplot as plt


In [44]:

def scrape_reviews(asin, pages=1):
    """
    Scrapes Amazon product reviews using standard Selenium with stealth settings.

    Parameters:
        asin (str): The ASIN of the Amazon product.
        pages (int): Number of review pages to scrape.

    Returns:
        DataFrame: Contains review rating, title, and body text.
    """
    print(f"Collecting reviews for ASIN: {asin}")

    # Configure Chrome browser options
    options = Options()
    options.add_argument("--headless")  # Run without opening a window
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")
    options.add_argument("--disable-blink-features=AutomationControlled")
    options.add_argument(
        "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36"
    )

    # Start WebDriver (make sure chromedriver is installed and in PATH)
    driver = webdriver.Chrome(options=options)

    all_reviews = []

    for page in range(1, pages + 1):
        url = f"https://www.amazon.com/product-reviews/{asin}/?pageNumber={page}"
        driver.get(url)
        time.sleep(3 + page)  # Add a delay to reduce chance of blocking

        soup = BeautifulSoup(driver.page_source, "html.parser")

        # Detect login redirection (a sign of bot detection)
        title = soup.title.string if soup.title else ""
        if "Sign-In" in title:
            print("Blocked or redirected to login. Stopping scrape.")
            driver.quit()
            return pd.DataFrame()

        reviews = soup.select('div[data-hook="review"]')

        if not reviews:
            print(f"No reviews found on page {page}")
            continue

        for r in reviews:
            try:
                rating = r.select_one('i[data-hook="review-star-rating"]').text.split()[0]
                title = r.select_one('a[data-hook="review-title"]').text.strip()
                body = r.select_one('span[data-hook="review-body"]').text.strip()
                all_reviews.append({
                    "rating": float(rating),
                    "title": title,
                    "body": body
                })
            except:
                continue

        print(f"Page {page} scraped")

    driver.quit()

    if not all_reviews:
        print("No reviews were extracted.")
        return pd.DataFrame()

    return pd.DataFrame(all_reviews)

In [45]:
def analyze_sentiment(df):
    """
    Applies sentiment analysis to each review using TextBlob.

    Parameters:
        df (DataFrame): DataFrame containing the review 'body' text

    Returns:
        DataFrame: The same DataFrame with added polarity and sentiment columns
    """
    print("analysing sentment...")
    
    # Calculate polarity score using TextBlob
    df['polarity'] = df['body'].apply(lambda x: TextBlob(x).sentiment.polarity)

    # Categorize sentiment based on polarity score
    df['sentiment'] = df['polarity'].apply(
        lambda p: 'positive' if p > 0 else ('negative' if p < 0 else 'neutral')
    )
    
    return df


In [46]:
def plot_sentiment(df):
    """
    Plots a bar chart showing the count of positive, neutral, and negative reviews.

    Parameters:
        df (DataFrame): DataFrame with a 'sentiment' column
    """
    sentiment_counts = df['sentiment'].value_counts()

    # Bar chart with custom colors for clarity
    sentiment_counts.plot(kind='bar', title='Sentiment Distribution', color=['green', 'gray', 'red'])
    plt.xlabel('Sentiment')
    plt.ylabel('Review Count')
    plt.grid(True)
    plt.tight_layout()
    plt.show()


In [47]:
# target ASIN and number of pages to scrape
asin = "B0B92Y18GT"
pages = 2

# Scrape reviews
df_reviews = scrape_reviews(asin, pages)

# Preview first few reviews
df_reviews.head()

# Analyze sentiment
df_analyzed = analyze_sentiment(df_reviews)

#  Save the result to a CSV file
df_analyzed.to_csv(f"amazon_reviews_{asin}.csv", index=False)

# Show a preview
df_analyzed.head()

# 📊 Plot sentiment distribution
plot_sentiment(df_analyzed)


Collecting reviews for ASIN: B0B92Y18GT
No reviews found on page 1
No reviews found on page 2
No reviews were extracted.
analysing sentment...


KeyError: 'body'