In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from datetime import datetime

In [2]:
headers = {
    'authority' : 'www.amazon.com',
    'accept' : 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,/;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'accpet-language' : 'en-US,en;q=0.9',
    'sec-ch-ua' : '"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36' , 'Accept-Language':'en-US, en;q=0.5'
}

In [3]:
reviews_URL = 'https://www.amazon.com/i3-12100F-processor-featuring-support-required/product-reviews/B09NPJX7PV/ref=cm_cr_getr_d_paging_btm_next_3?ie=UTF8&pageNumber=1&reviewerType=all_reviews&filterByStar=five_star#reviews-filter-bar'

In [4]:
len_page = 15

In [5]:
# Extra Data as Html object from amazon Review page
def reviewsHtml(url, len_page):
    
    # Empty List define to store all pages html data
    soups = []
    
    # Loop for gather all 3000 reviews from 300 pages via range
    for page_no in range(2, len_page + 1):
        
        # parameter set as page no to the requests body
        params = {
            'ie': 'UTF8',
            'reviewerType': 'all_reviews',
            'filterByStar': 'critical',
            'pageNumber': page_no,
        }
        
        # Request make for each page
        response = requests.get(url, headers=headers , params=params)
        
        # Save Html object by using BeautifulSoup4 and lxml parser
        soup = BeautifulSoup(response.text, 'lxml')
          # Add single Html page data in master soups list
        soups.append(soup)
        
    return soups

In [6]:
def getReviews(html_data):

    # Create Empty list to Hold all data
    data_dicts = []
    
    # Select all Reviews BOX html using css selector
    boxes = html_data.select('div[data-hook="review"]')
    
    # Iterate all Reviews BOX 
    for box in boxes:
        
        # Select Name using css selector and cleaning text using strip()
        # If Value is empty define value with 'N/A' for all.
        try:
            name = box.select_one('[class="a-profile-name"]').text.strip()
        except Exception as e:
            name = 'N/A'

        try:
            stars = box.select_one('[data-hook="review-star-rating"]').text.strip().split(' out')[0]
        except Exception as e:
            stars = 'N/A'   

        try:
            title = box.select_one('[data-hook="review-title"]').text.strip()
        except Exception as e:
            title = 'N/A'
        try:
            # Convert date str to dd/mm/yyy format
            datetime_str = box.select_one('[data-hook="review-date"]').text.strip().split(' on ')[-1]
            date = datetime.strptime(datetime_str, '%B %d, %Y').strftime("%d/%m/%Y")
        except Exception as e:
            date = 'N/A'

        try:
            description = box.select_one('[data-hook="review-body"]').text.strip()
        except Exception as e:
            description = 'N/A'

        # create Dictionary with al review data 
        data_dict = {
            'Name' : name,
            'Stars' : stars,
            'Title' : title,
            'Date' : date,
            'Description' : description
        }

        # Add Dictionary in master empty List
        data_dicts.append(data_dict)
    
    return data_dicts    

In [7]:
html_datas = reviewsHtml(reviews_URL, len_page)

In [8]:
reviews = []

In [9]:
for html_data in html_datas:
    
    # Grab review data
    review = getReviews(html_data)
    
    # add review data in reviews empty list
    reviews += review

In [10]:
df_reviews = pd.DataFrame(reviews)

In [11]:
df_reviews

Unnamed: 0,Name,Stars,Title,Date,Description
0,Amazon Customer,5.0,5.0 out of 5 stars\nIts a damn good budget CPU,23/03/2024,Its a damn good budget CPU. Great for 1080p -1...
1,Charles Schroen,5.0,5.0 out of 5 stars\nNice upgrade,24/03/2024,This cpu is part of my upgrade from socket 120...
2,Nate,5.0,5.0 out of 5 stars\nIts Really Good and Underr...,21/04/2024,Really good for budget builds
3,Derek Zoolander,5.0,5.0 out of 5 stars\nWhat a great budget CPU,22/05/2024,Getting over 100fps and even 150fps in some ca...
4,Brian Russell Jones,5.0,5.0 out of 5 stars\nMust use a GPU with this C...,30/06/2024,Works like a charm. For all my Steam Games it ...
5,Andrés Aquino,5.0,5.0 out of 5 stars\nRecomendación basada en el...,11/07/2024,El procesador Intel Core i3-12100F ha superado...
6,WAndY,5.0,5.0 out of 5 stars\nGreat performance for the ...,24/11/2023,I loved the i3-12100f because it is very cheap...
7,Single Fluffy Braincell,5.0,5.0 out of 5 stars\nIt Sure Is A Processor!,16/05/2024,We only bought this to update BIOS on a new mo...
8,Ryan Montgomery,5.0,5.0 out of 5 stars\nBudget Gaming King,13/04/2024,I used this to build a pc for my wife. The per...
9,Hiro,5.0,5.0 out of 5 stars\nDoes the job,07/05/2024,Bought as a test CPU to see if motherboard was...


In [12]:
df_reviews.to_csv('reviews_4star_1_i3.csv', index=False)