In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import time

In [2]:
# Function to extract Product Title
def get_title(soup):

    try:
        # Outer Tag Object
        title = soup.find("span", attrs={"id":'productTitle'})
        
        # Inner NavigatableString Object
        title_value = title.text

        # Title as a string value
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string

# Function to extract Product Price
def get_price(soup):

    try:
        price = soup.find("span", attrs={'class':'aok-offscreen'}).string.strip()

    except AttributeError:

        try:
            # If there is some deal price
            price = soup.find("span", attrs={'id':'priceblock_dealprice'}).string.strip()

        except:
            price = ""

    return price

# Function to extract Product Rating
def get_rating(soup):

    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()
    
    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = ""	

    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()

    except AttributeError:
        review_count = ""	

    return review_count

# Function to extract Availability Status
def get_availability(soup):
    try:
        available = soup.find("span", attrs={'id':'social-proofing-faceout-title-tk_bought'})
        available = available.find("span").string.strip()

    except AttributeError:
        available = "Not Available"	

    return available

In [3]:
if __name__ == '__main__':
    # Add your user agent 
    HEADERS = ({
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36',
        'Accept-Language': 'en-US, en;q=0.5'
    })

    search_query = "yamaha keyboard"
    num_pages_to_scrape = 5

    d = {"title": [], "price": [], "rating": [], "reviews": [], "availability": []}

    for page_number in range(1, num_pages_to_scrape + 1):
        # Construct the URL for the current search page
        URL = f"https://www.amazon.com/s?k={search_query}&page={page_number}&rh=n%3A11970031&ref=nb_sb_noss"

        # HTTP Request
        webpage = requests.get(URL, headers=HEADERS, timeout=5)

        # Check if the page was successfully retrieved
        if webpage.status_code == 200:
            soup = BeautifulSoup(webpage.content, "html.parser")

            # Fetch links as List of Tag Objects
            links = soup.find_all("a", attrs={'class': 'a-link-normal s-no-outline'})

            # Store the links
            links_list = []

            # Loop for extracting links from Tag Objects
            for link in links:
                links_list.append(link.get('href'))

            # Loop for extracting product details from each link 
            for link in links_list:
                # Check if the link is relative (starts with "/")
                if link.startswith("/"):
                    full_url = "https://www.amazon.com" + link
                else:
                    # Handle cases where the link is already a full URL
                    full_url = link

                new_webpage = requests.get(full_url, headers=HEADERS, timeout=5)

                new_soup = BeautifulSoup(new_webpage.content, "html.parser")

                # Function calls to display all necessary product information
                d['title'].append(get_title(new_soup))
                d['price'].append(get_price(new_soup))
                d['rating'].append(get_rating(new_soup))
                d['reviews'].append(get_review_count(new_soup))
                d['availability'].append(get_availability(new_soup))

            # Introduce a delay (rate limiting) between requests
            time.sleep(5)  # Adjust the value as needed
        else:
            print(f"Failed to retrieve page {page_number}. Status code: {webpage.status_code}")

    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['title'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['title'])
    amazon_df.to_csv("amazon_pages1.csv", header=True, index=False)

In [4]:
amazon_df

Unnamed: 0,title,price,rating,reviews,availability
0,JIKADA 61 Key Portable Electronic Keyboard Pia...,,4.5 out of 5 stars,365 ratings,300+ bought in past month
1,RockJam Compact 61 Key Keyboard with Sheet Mus...,,4.4 out of 5 stars,"31,655 ratings",1K+ bought in past month
2,"Korg, 88-Key Digital Pianos-Home (L1MRED)",$329.99,4.4 out of 5 stars,34 ratings,Not Available
3,Yamaha YPT270 61-Key Portable Keyboard With Po...,$159.99,4.8 out of 5 stars,"1,544 ratings",700+ bought in past month
4,Yamaha PSREW310 76-Key Touch Sensitive Portabl...,,4.7 out of 5 stars,786 ratings,200+ bought in past month
...,...,...,...,...,...
197,MUSTAR Digital Piano 88 Weighted Keys Hammer A...,$337.99,4.6 out of 5 stars,43 ratings,Not Available
198,"MUSTAR 88 Key Weighted Keyboard Piano, Full Si...",$259.99,5.0 out of 5 stars,1 rating,Not Available
199,MUSTAR Digital Piano 88 Weighted Keys with Sta...,,4.3 out of 5 stars,110 ratings,100+ bought in past month
200,"Casio, 61-Key Portable Keyboard (CT-S1BK)",$219.00,4.8 out of 5 stars,803 ratings,100+ bought in past month


In [5]:
amazon_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 202 entries, 0 to 201
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   title         202 non-null    object
 1   price         202 non-null    object
 2   rating        202 non-null    object
 3   reviews       202 non-null    object
 4   availability  202 non-null    object
dtypes: object(5)
memory usage: 8.0+ KB


In [6]:
amazon_df.head(25)

Unnamed: 0,title,price,rating,reviews,availability
0,JIKADA 61 Key Portable Electronic Keyboard Pia...,,4.5 out of 5 stars,365 ratings,300+ bought in past month
1,RockJam Compact 61 Key Keyboard with Sheet Mus...,,4.4 out of 5 stars,"31,655 ratings",1K+ bought in past month
2,"Korg, 88-Key Digital Pianos-Home (L1MRED)",$329.99,4.4 out of 5 stars,34 ratings,Not Available
3,Yamaha YPT270 61-Key Portable Keyboard With Po...,$159.99,4.8 out of 5 stars,"1,544 ratings",700+ bought in past month
4,Yamaha PSREW310 76-Key Touch Sensitive Portabl...,,4.7 out of 5 stars,786 ratings,200+ bought in past month
5,Yamaha PSRE373 61-Key Touch Sensitive Portable...,$219.99,4.6 out of 5 stars,56 ratings,300+ bought in past month
6,"Donner Keyboard Piano 61 Key, Electric Keyboar...",,4.5 out of 5 stars,215 ratings,100+ bought in past month
7,Roland GO:KEYS 61-key Music Creation Piano Key...,$349.99,4.5 out of 5 stars,"1,146 ratings",100+ bought in past month
8,"MUSTAR Piano Keyboard, 61 Key Learning Keyboar...",$111.99 with 20 percent savings,4.4 out of 5 stars,538 ratings,400+ bought in past month
9,JIKADA 61 Key Portable Electronic Keyboard Pia...,$159.99,4.5 out of 5 stars,365 ratings,300+ bought in past month
