# Amazon Web Scrape: Data Science Apparel 

In [1]:
# Import dependencies.

from bs4 import BeautifulSoup
import requests
import time
import datetime
import smtplib


In [2]:
# Connecting to website.

url = 'https://www.amazon.com/Funny-Data-Systems-Business-Analyst/dp/B07FNW9FGJ?customId=B0752XJYNL&customizationToken=MC_Assembly_1%23B0752XJYNL&th=1'

# Headers does not work in this case.
# headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0", "Accept-Encoding":"gzip, deflate", "Accept":"text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "DNT":"1","Connection":"close", "Upgrade-Insecure-Requests":"1"} 

page = requests.get(url)

soup = BeautifulSoup(page.content, 'html.parser')

In [3]:
# Use the following code to print the soup to check that the url was retrieved properly.

# print(soup.prettify())

In [4]:
# Extracting the product title.

title = soup.find(id='productTitle').get_text().strip()
print(title)

Funny Got Data MIS Data Systems Business Analyst T-Shirt


In [5]:
# Extracting Amazon's pledge.

pledge = soup.find(id='climatePledgeFriendlyBadge').get_text().strip()
print(pledge)

Climate Pledge Friendly


In [6]:
# Extracting the product price. 

price_element = soup.select_one('span.a-price').select_one('span.a-offscreen')
price = price_element.text
print(price)

$19.99


In [7]:
# Extracting the different product sizes.

# Create an empty list to store the sizes.

fit_sizes = []

# Use a For Loop to iterate through all 5 sizes and extract the texts.

for i in range(5):  
    fit_id = f"fit_type_{i}"
    fit_element = soup.find(id=fit_id).text.strip()
    
    if fit_element:
        fit_size = fit_element
        fit_sizes.append(fit_size)
    else:
        print(f"No fit size found with ID {fit_id}")

In [8]:
# Extracting the rating of the product. 

rating_element = soup.find(id='acrPopover')
rating_text = rating_element.attrs.get('title')
rating_text = rating_text.replace('out of 5 stars', "")
print(rating_text)

4.3 


In [9]:
# Extracting customer reviews.

review_elements = soup.select('div.review')

scraped_reviews = []

for review in review_elements:
    r_name_element = review.select_one('span.a-profile-name')
    r_name = r_name_element.text.strip() if r_name_element else None
    
    r_rating_element = review.select_one('i.review-rating')
    r_rating = r_rating_element.text.strip().replace("out of 5 stars", "") if r_rating_element else None
    
    r_title_element = review.select_one('a.review-title')
    r_title_span_element = r_title_element.select_one('span:not([class])') if r_title_element else None
    r_title = r_title_span_element.text.strip() if r_title_span_element else None
    
    r_content_element = review.select_one('span.review-text')
    r_content = r_content_element.text.strip().replace('\nRead more', '') if r_content_element else None
    
    r_date_element = review.select_one('span.review-date')
    r_date = r_date_element.text.strip() if r_date_element else None
    
    r_verified_element = review.select_one('span.a-size-mini')
    r_verified = r_verified_element.text.strip() if r_verified_element else None
    
    r = {
        'author': r_name,
        'rating': r_rating,
        'title': r_title,
        'content': r_content,
        'date': r_date,
        'verified': r_verified
    }
    
    scraped_reviews.append(r)


In [10]:
# Print the scraped reviews.

for count, review in enumerate(scraped_reviews, start=1):
    print("Author:", review['author'])
    print("Rating:", review['rating'])
    print("Title:", review['title'])
    print("Content:", review['content'])
    print("Date:", review['date'])
    print("Verified:", review['verified'])
    print('_____________________________', count)


Author: SB
Rating: 5.0 
Title: Very soft T
Content: This shirt is so comfortable.
Date: Reviewed in the United States on October 31, 2021
Verified: Verified Purchase
_____________________________ 1
Author: Liana Doyle
Rating: 5.0 
Title: Great gift
Content: Soft and fits true to size !
Date: Reviewed in the United States on January 8, 2019
Verified: Verified Purchase
_____________________________ 2
Author: MSA
Rating: 1.0 
Title: The printing is horrible and the t-shirt was dusty when it came to me.
Content: The printing is really bad the cotton in the T-shirt was really bad. I got this as a gift and I was embarrassed to give it to my friend.
Date: Reviewed in the United States on April 15, 2023
Verified: Verified Purchase
_____________________________ 3
