In [60]:
# Import packages
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime

In [61]:
# Header to set the requests as a browser requests
headers = {
    'authority': 'www.amazon.com',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-language': 'en-US,en;q=0.9,bn;q=0.8',
    'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="102", "Google Chrome";v="102"',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.0.0 Safari/537.36'
}

In [62]:
# URL of The amazon Review page
reviews_url = 'https://www.amazon.com/Legendary-Whitetails-Journeyman-Jacket-Tarmac/product-reviews/B013KW38RQ/'

In [63]:
# Define Page No
len_page = 4

In [64]:
# Extra Data as Html object from amazon Review page
def reviewsHtml(url, len_page):
    
    # Empty List define to store all pages html data
    soups = []
    
    # Loop for gather all 3000 reviews from all pages via range
    for page_no in range(1, len_page + 1):
        
        # parameter set as page no to the requests body
        params = {
            'ie': 'UTF8',
            'reviewerType': 'all_reviews',
            'filterByStar': 'critical',
            'pageNumber': page_no,
        }
        
        # Request make for each page
        response = requests.get(url, headers=headers)
        
        # Save Html object by using BeautifulSoup4 and lxml parser
        soup = BeautifulSoup(response.text, 'lxml')
        
        # Add single Html page data in master soups list
        soups.append(soup)
        
    return soups

In [65]:
# Grab Reviews name, description, date, stars, title from HTML
def getReviews(html_data):

    # Create Empty list to Hold all data
    data_dicts = []
    
    # Select all Reviews BOX html using css selector
    boxes = html_data.select('div[data-hook="review"]')
    
    # Iterate all Reviews BOX 
    for box in boxes:
        
        # Select Name using css selector and cleaning text using strip()
        # If Value is empty define value with 'N/A' for all.
        try:
            name = box.select_one('[class="a-profile-name"]').text.strip()
        except Exception as e:
            name = 'N/A'

        try:
            stars = box.select_one('[data-hook="review-star-rating"]').text.strip().split(' out')[0]
        except Exception as e:
            stars = 'N/A'   

        try:
            title = box.select_one('[data-hook="review-title"]').text.strip().split('\n')[-1]
        except Exception as e:
            title = 'N/A'

        try:
            # Convert date str to dd/mm/yyy format
            datetime_str = box.select_one('[data-hook="review-date"]').text.strip().split(' on ')[-1]
            date = datetime.strptime(datetime_str, '%B %d, %Y').strftime("%d/%m/%Y")
        except Exception as e:
            date = 'N/A'

        try:
            description = box.select_one('[data-hook="review-body"]').text.strip()
        except Exception as e:
            description = 'N/A'

        # create Dictionary with al review data 
        data_dict = {
            'Name' : name,
            'Stars' : stars,
            'Title' : title,
            'Date' : date,
            'Description' : description
        }

        # Add Dictionary in master empty List
        data_dicts.append(data_dict)
    
    return data_dicts

### <font color="red">Data Process</font>

In [66]:
# Grab all HTML
html_datas = reviewsHtml(reviews_url, len_page)

In [67]:
# Empty List to Hold all reviews data
reviews = []

In [68]:
# Iterate all Html page 
for html_data in html_datas:
    
    # Grab review data
    review = getReviews(html_data)
    
    # add review data in reviews empty list
    reviews += review

In [69]:
# Create a dataframe with reviews Data
df_reviews = pd.DataFrame(reviews)

In [70]:
df_reviews

Unnamed: 0,Name,Stars,Title,Date,Description
0,Melissa Sugar-Gold,5.0,"Durable, Attractive, Multi-Functional Shirt/Ja...",01/08/2022,I purchased various Legendary Whitetails Men's...
1,Kyle,5.0,Very comfortable and looks as advertised,09/07/2024,"Fits well, made from comfortable and durable m..."
2,Nick,5.0,"Bought twice, would buy again",24/02/2024,I bought this jacket first five years ago and ...
3,Jeff,4.0,"Rugged, good look and nice fit, but should you...",02/10/2018,The short answer to if you should go down from...
4,#1NDfan,5.0,Functional & durable,28/04/2024,Bought this for my farmer husband (Christmas 2...
5,Bartek,5.0,Rugged Comfort Meets Style: The Ultimate Shirt...,19/03/2024,The Legendary Whitetails Journeyman Shirt Jack...
6,Loki,3.0,Looks aren't everything,16/04/2024,"Pros: comfortable, stylishCons: when I first p..."
7,Review man,5.0,Looks great,27/08/2024,"Fits great, comfortable and stylish.Lots of co..."
8,Scott,4.0,More Polyester than cotton?,02/09/2024,Just ordered this and very impressed with the ...
9,Aldo,4.0,Nice light jacket.,13/08/2024,Looks great. I wasn’t expecting to be a nice p...


In [71]:
# Save data
df_reviews.to_csv('reviews.csv', index=False)

In [72]:
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer


# Initialize VADER sentiment analyzer
nltk.download('vader_lexicon')
sid = SentimentIntensityAnalyzer()

# Define a function to calculate sentiment scores
def analyze_sentiment(text):
    sentiment_scores = sid.polarity_scores(text)
    return sentiment_scores['compound']

# Apply sentiment analysis to the review column
df_reviews['sentiment_score'] = df_reviews['Description'].apply(analyze_sentiment)

# Classify sentiment based on compound score
df_reviews['sentiment'] = df_reviews['sentiment_score'].apply(lambda score: 'positive' if score > 0 else 'negative' if score < 0 else 'neutral')

# Display the DataFrame with sentiment analysis
df_reviews.head()


[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\rites\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


Unnamed: 0,Name,Stars,Title,Date,Description,sentiment_score,sentiment
0,Melissa Sugar-Gold,5.0,"Durable, Attractive, Multi-Functional Shirt/Ja...",01/08/2022,I purchased various Legendary Whitetails Men's...,0.994,positive
1,Kyle,5.0,Very comfortable and looks as advertised,09/07/2024,"Fits well, made from comfortable and durable m...",0.886,positive
2,Nick,5.0,"Bought twice, would buy again",24/02/2024,I bought this jacket first five years ago and ...,0.9941,positive
3,Jeff,4.0,"Rugged, good look and nice fit, but should you...",02/10/2018,The short answer to if you should go down from...,0.9942,positive
4,#1NDfan,5.0,Functional & durable,28/04/2024,Bought this for my farmer husband (Christmas 2...,0.8922,positive


In [73]:
import openpyxl

# Export the DataFrame to an Excel file
output_file_path = 'review_sentiments.xlsx'
df_reviews.to_excel(output_file_path, index=False)

print(f"Sentiment analysis results saved to {output_file_path}")


Sentiment analysis results saved to review_sentiments.xlsx
