In [9]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [10]:
# Function to extract the title of the product as a string value
def get_title(soup):
    try:
        # Extracting the outer span tag object and putting it into a title variable
        title = soup.find("span", attrs = {"id" : "productTitle"})
        
        # The title of the product as a string value with no spaces
        title_string = title.text.strip()
    
    except AttributeError:
        title_string = ""
        
    return title_string

# Function to extract the price of the product
def get_price(soup):
    try:
        price = soup.find("span", attrs = {"class" : "a-offscreen"}).string.strip()
    
    except AttributeError:
        try:
            price = soup.find("span", attrs = {"class" : "a-offscreen"}).string.strip()
            
        except:
            price = ""
    
    return price

# Function to extract the savings percentage
def get_savings_percentage(soup):
    try:
        savings_percentage = soup.find("span", attrs = {"class" : "a-size-large a-color-price savingPriceOverride aok-align-center reinventPriceSavingsPercentageMargin savingsPercentage"}).string.strip()
        
    except AttributeError:
        savings_percentage = "-0%"
    
    return savings_percentage


# Function to extract the average rating of the product
def get_average_rating(soup):
    try:
        average_rating = soup.find("i", attrs = {"class" : "a-icon a-icon-star a-star-4-5"}).string.strip()
        
    except AttributeError:
        try:
            average_rating = soup.find("span", attrs = {"class" : "a-icon-alt"}).string.strip()
            
        except:
            average_rating = ""
    return average_rating

# Function to extract the number of user reviews on the product
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs = {"id" : "acrCustomerReviewText"}).string.strip()
        
    except AttributeError:
        review_count = ""
        
    return review_count

# Function to extract the availability of the product
def get_availability(soup):
    try:
        available = soup.find("div", attrs = {"id" : "availability"})
        available = available.find("span").string.strip()
        
    except AttributeError:
        available = "Not Available"
        
    return available

In [11]:
# Connecting to the website
if __name__ == "__main__":
     # Webpage URL (using 'monitors' search term on Amazon as an example)
    URL = "https://www.amazon.com/s?k=monitors&crid=3LOMTPK6Z691W&sprefix=monitors%2Caps%2C170&ref=nb_sb_noss_1"
    
    # User-Agent to establish connection to the Amazon website (input own User-Agent here)
    HEADERS = ({'User-Agent' : '', 'Accept-Language' : 'en-US, en; q = 0.5'})

    # HTTP Request to get and bring in data
    webpage = requests.get(URL, headers = HEADERS)
    
    # Creating a 'soup' variable that contains all of the data we pull from the webpage
    soup = BeautifulSoup(webpage.content, "html.parser")
    
    # Using soup.find_all to fetch links as a list of tag objects 
    links = soup.find_all("a", attrs = {"class" : "a-link-normal s-no-outline"})
    
    # Creating a list to store all of the links we pull in
    links_list = []
    
    # Loop to extract links from tag objects
    for link in links:
        links_list.append(link.get("href"))
    
    # Creating a dictionary to store the key : value (stored in a list) pair
    productDictionary = {"Title" : [], "Price" : [], "Savings" : [], "Rating" : [], "Reviews" : [], "Availability" : []}
    
    # Loop for extracting product details from each link
    for link in links_list:
        new_webpage = requests.get("https://www.amazon.com" + link, headers = HEADERS)
        
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")
        
        # Function calls to display all necessary product information
        productDictionary['Title'].append(get_title(new_soup))
        productDictionary['Price'].append(get_price(new_soup))
        productDictionary['Savings'].append(get_savings_percentage(new_soup))
        productDictionary['Rating'].append(get_average_rating(new_soup))
        productDictionary['Reviews'].append(get_review_count(new_soup))
        productDictionary['Availability'].append(get_availability(new_soup))
    
    # Pandas DataFrame (from_dict)
    amazon_data = pd.DataFrame.from_dict(productDictionary)
    # NumPy .nan special value
    amazon_data['Title'].replace('', np.nan, inplace = True)
    amazon_data = amazon_data.dropna(subset = ['Title'])
    # Converting the amazon data aggregator to .csv file format
    amazon_data.to_csv("amazon_data.csv", header = True, index = False)

In [12]:
amazon_data

Unnamed: 0,Title,Price,Savings,Rating,Reviews,Availability
0,Z-Edge UG24 24-inch Curved Gaming Monitor 180H...,$149.99,-29%,4.4 out of 5 stars,497 ratings,In Stock.
1,KS Doublesight Displays 2 HD 24” Monitors (192...,$329.99,-0%,4.0 out of 5 stars,1 rating,In Stock.
2,"Sceptre 24"" Professional Thin 75Hz 1080p LED M...",$99.98,-20%,4.6 out of 5 stars,"25,156 ratings",In Stock.
3,Acer 21.5 Inch Full HD (1920 x 1080) IPS Ultra...,$89.99,-10%,4.6 out of 5 stars,"40,708 ratings",In Stock.
4,"Sceptre Curved 27"" Gaming Monitor 75Hz HDMI x2...",$129.97,-43%,4.6 out of 5 stars,663 ratings,In Stock.
5,KOORUI 24 Inch Computer Monitor Full HD 1920 x...,$88.50,-27%,4.3 out of 5 stars,"1,317 ratings",In Stock.
6,Z-Edge UG24 24-inch Curved Gaming Monitor 180H...,$149.99,-29%,4.4 out of 5 stars,497 ratings,In Stock.
7,"KOORUI 24 Inch Computer Monitor, 3-Sided Frame...",$95.99,-20%,4.5 out of 5 stars,348 ratings,In Stock.
8,"GTEK 24 Inch 75Hz Frameless Computer Monitor, ...",$96.87,-0%,4.6 out of 5 stars,132 ratings,In Stock.
9,"Z-Edge 27-inch Curved Gaming Monitor, Full HD ...",$179.99,-20%,4.3 out of 5 stars,630 ratings,In Stock.
