In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np
import time

In [2]:
# Function to extract Product Name
def get_name(soup):

    try:
        title = soup.find("span", attrs={"id":'productTitle'})
        title_value = title.text
        title_string = title_value.strip()

    except AttributeError:
        title_string = ""

    return title_string

# Function to extract Product Price
def get_price(soup):
    try:
        price = soup.find("span", attrs={'class':'a-price'}).string.strip()

    except AttributeError:

        try:
            price = soup.find("span", attrs={'class':'a-offscreen'}).string.strip()

        except:
            price = ""

    return price

# Function to extract Product Rating
def get_rating(soup):

    try:
        rating = soup.find("i", attrs={'class':'a-icon a-icon-star a-star-4-5'}).string.strip()
    
    except AttributeError:
        try:
            rating = soup.find("span", attrs={'class':'a-icon-alt'}).string.strip()
        except:
            rating = ""

    return rating

# Function to extract Number of User Reviews
def get_review_count(soup):
    try:
        review_count = soup.find("span", attrs={'id':'acrCustomerReviewText'}).string.strip()

    except AttributeError:
        review_count = ""

    return review_count


#Function to extract Description
def get_description(soup):
    try:
        description = soup.find("ul", attrs={"class": "a-unordered-list a-vertical a-spacing-mini"}).text.strip()
        
    except AttributeError:
        description = ""
        
    return description

#Function to extract Product Details
def get_details(soup):
    try:
        details = ""
        details_div = soup.find("div", attrs={"id": "detailBullets_feature_div"})
        details_li = details_div.find_all('li')
        for product_detail in details_li:
            pd = product_detail.find_all('span', attrs={'class':'a-list-item'})
            for pd_i in pd:
                pd_i_text = pd_i.find_all('span')
                pd_i_d, pd_i_val = pd_i_text[0].text.split('  ')[0].strip(), pd_i_text[1].text.strip()
                details += pd_i_d + ": " + pd_i_val + "\n"
          
    except AttributeError:
        details = ""
        
    return details


In [3]:
if __name__ == '__main__':

    # user agent 
    HEADERS = ({'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36', 'Accept-Language': 'en-US, en;q=0.5'})

    # The webpage URL
    baseURL = "https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%252+C283&ref=sr_pg_1"
    
    d = {"Name":[], "URL":[], "Price":[], "Rating":[], "Reviews":[],"Description":[], "ASIN":[], "Details":[],"Manufacturer":[]}
    
    
    
    count = 0
    i = 0
    
    # Loop for extracting 200 products
    while(count<200):
        
        # HTTP Request
        if(i==0):
            webpage = requests.get(baseURL, headers=HEADERS)
        else:
            try:
                new_baseURL = "https://www.amazon.in/s?k=bags&page={}&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%252+C283&ref=sr_pg_{}".format(i,i)
                webpage = requests.get(new_baseURL, headers=HEADERS)
            except:
                print("Connection Error! Sleeping for 5 sec ... ")
                time.sleep(5)
                continue
        i += 1
    
    
        # Soup Object containing all data
        soup = BeautifulSoup(webpage.content, "html.parser")

        # Fetching links as List of Tag Objects
        links = soup.find_all("a", attrs={'class':'a-link-normal s-no-outline'})

        # Storing the links
        links_list = []

        # Loop for extracting links from Tag Objects
        for link in links:
            links_list.append(link.get('href'))

                
        count += len(links_list)
        

        # Loop for extracting product details from each link 
        for link in links_list:
            new_url = "https://www.amazon.in" + link
            try:
                new_webpage = requests.get(new_url, headers=HEADERS)
                new_soup = BeautifulSoup(new_webpage.content, "html.parser")
            except:
                print("Connection Error! Sleeping for 5 seconds ...")
                time.sleep(5)
                continue

            details = get_details(new_soup)
            
            prod_asin = "Not found"
            prod_manufacturer = "Not found"
            for detail in details.split("\n"):
                if "ASIN" in detail:
                    try:
                        prod_asin = detail.split(":")[1].strip()
                    except:
                        prod_asin = ""
                if "Manufacturer" in detail:
                    try:
                        prod_manufacturer = detail.split(":")[1].strip()
                    except:
                        prod_manufacturer = ""

            d['Name'].append(get_name(new_soup))
            d['URL'].append(new_url)
            d['Price'].append(get_price(new_soup))
            d['Rating'].append(get_rating(new_soup))
            d['Reviews'].append(get_review_count(new_soup))
            d['Description'].append(get_description(new_soup))
            d['ASIN'].append(prod_asin)
            d['Details'].append(details)
            d['Manufacturer'].append(prod_manufacturer)

    
    amazon_df = pd.DataFrame.from_dict(d)
    amazon_df['Name'].replace('', np.nan, inplace=True)
    amazon_df = amazon_df.dropna(subset=['Name'])
    amazon_df.to_csv("AmazonProductData.csv", header=True, index=False)

Connection Error! Sleeping for 5 seconds ...
Connection Error! Sleeping for 5 seconds ...
Connection Error! Sleeping for 5 seconds ...
Connection Error! Sleeping for 5 seconds ...
Connection Error! Sleeping for 5 seconds ...
Connection Error! Sleeping for 5 seconds ...
Connection Error! Sleeping for 5 seconds ...
Connection Error! Sleeping for 5 seconds ...
Connection Error! Sleeping for 5 seconds ...
Connection Error! Sleeping for 5 seconds ...


In [4]:
amazon_df

Unnamed: 0,Name,URL,Price,Rating,Reviews,Description,ASIN,Details,Manufacturer
1,"RASHKI Aero Overnighter Backpack 15.6"" Inch La...",https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,"₹2,490",5.0 out of 5 stars,13 ratings,Closure: Zipper Women Backpack Women pre...,B0BW8T473B,Product Dimensions: 54 x 37 x 18 cm; 2 Kilogra...,Not found
2,Red Lemon Bange Hard Ironlook Briefcase Laptop...,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,"₹4,990",4.4 out of 5 stars,80 ratings,"? PREMIUM MATERIAL: Made from high quality, wa...",B09ZYQJHXR,Product Dimensions: 30 x 17 x 48 cm; 1.63 Kilo...,Encon Impex Pvt Ltd
3,Wesley Milestone 2.0 Casual Waterproof Laptop ...,https://www.amazon.in/Wesley-Milestone-Waterpr...,₹598.00,4.3 out of 5 stars,"10,969 ratings",30L Capacity: The Backpack has a padded laptop...,Not found,,Not found
4,American Tourister 32 Ltrs Black Casual Backpa...,https://www.amazon.in/American-Tourister-AMT-S...,"₹1,199.00",4.0 out of 5 stars,"54,253 ratings","Laptop Compatibility: No, Strap Type: Adjustab...",Not found,,Not found
5,Skybags Brat Black 46 Cms Casual Backpack,https://www.amazon.in/Skybags-Brat-Black-Casua...,₹630.00,4.1 out of 5 stars,"4,570 ratings",Combination of functional & safety features in...,B08Z1HHHTD,Product Dimensions: 10.01 x 19.99 x 11.99 cm; ...,VIP Industries Ltd
...,...,...,...,...,...,...,...,...,...
202,TRUE HUMAN® Emperor Anti-Theft Pocket backpack...,https://www.amazon.in/TRUE-Anti-Theft-backpack...,₹649,3.3 out of 5 stars,15 ratings,Care Instructions: Wipe with Dry Cloth * Ca...,B0BYBDX8VP,Product Dimensions: 10 x 20 x 30 cm; 450 Grams...,Rama luxury store
203,Half Moon 30L Waterproof 15.6 inch Laptop Offi...,https://www.amazon.in/Half-Moon-Waterproof-Lap...,₹589,3.7 out of 5 stars,358 ratings,SPACIOUS AND LIGHTWEIGHT: Light weight & spaci...,B09QM9N1B3,Product Dimensions: 45 x 31 x 22 cm; 420 Grams...,Half_Moon
204,BEAUTY GIRLS 1531 Polyester Waterproof 30 L Fl...,https://www.amazon.in/BEAUTY-GIRLS-1531-Pink-W...,₹849.00,4.2 out of 5 stars,"1,472 ratings",AT BEAUTY GIRLS bags are specially designed fo...,B08YR3JJ64,Product Dimensions: 30.5 x 18 x 43 cm; 350 Gra...,SHRI SALASAR ENTERPRISES
205,Red Lemon Bange Hard Ironlook Briefcase Laptop...,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,"₹4,990",4.4 out of 5 stars,80 ratings,"? PREMIUM MATERIAL: Made from high quality, wa...",B09ZYQJHXR,Product Dimensions: 30 x 17 x 48 cm; 1.63 Kilo...,Encon Impex Pvt Ltd


In [5]:
amazon_df.head()

Unnamed: 0,Name,URL,Price,Rating,Reviews,Description,ASIN,Details,Manufacturer
1,"RASHKI Aero Overnighter Backpack 15.6"" Inch La...",https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,"₹2,490",5.0 out of 5 stars,13 ratings,Closure: Zipper Women Backpack Women pre...,B0BW8T473B,Product Dimensions: 54 x 37 x 18 cm; 2 Kilogra...,Not found
2,Red Lemon Bange Hard Ironlook Briefcase Laptop...,https://www.amazon.in/sspa/click?ie=UTF8&spc=M...,"₹4,990",4.4 out of 5 stars,80 ratings,"? PREMIUM MATERIAL: Made from high quality, wa...",B09ZYQJHXR,Product Dimensions: 30 x 17 x 48 cm; 1.63 Kilo...,Encon Impex Pvt Ltd
3,Wesley Milestone 2.0 Casual Waterproof Laptop ...,https://www.amazon.in/Wesley-Milestone-Waterpr...,₹598.00,4.3 out of 5 stars,"10,969 ratings",30L Capacity: The Backpack has a padded laptop...,Not found,,Not found
4,American Tourister 32 Ltrs Black Casual Backpa...,https://www.amazon.in/American-Tourister-AMT-S...,"₹1,199.00",4.0 out of 5 stars,"54,253 ratings","Laptop Compatibility: No, Strap Type: Adjustab...",Not found,,Not found
5,Skybags Brat Black 46 Cms Casual Backpack,https://www.amazon.in/Skybags-Brat-Black-Casua...,₹630.00,4.1 out of 5 stars,"4,570 ratings",Combination of functional & safety features in...,B08Z1HHHTD,Product Dimensions: 10.01 x 19.99 x 11.99 cm; ...,VIP Industries Ltd


In [6]:
amazon_df.iloc[0]

Name            RASHKI Aero Overnighter Backpack 15.6" Inch La...
URL             https://www.amazon.in/sspa/click?ie=UTF8&spc=M...
Price                                                      ₹2,490
Rating                                         5.0 out of 5 stars
Reviews                                                13 ratings
Description     Closure: Zipper    Women Backpack    Women pre...
ASIN                                                   B0BW8T473B
Details         Product Dimensions: 54 x 37 x 18 cm; 2 Kilogra...
Manufacturer                                            Not found
Name: 1, dtype: object

In [7]:
amazon_df.iloc[0].Description

'Closure: Zipper    Women Backpack    Women premium Backpack    Women Laptop bag    Unisex Work Backpack    Men Laptop Backpack'

In [8]:
amazon_df.iloc[0].Details

'Product Dimensions: 54 x 37 x 18 cm; 2 Kilograms\nDate First Available: 20 February 2023\nASIN: B0BW8T473B\nItem model number: SCARLET0521MTBR\nCountry of Origin: India\nDepartment: unisex-adult\nItem Weight: 2 kg\nItem Dimensions LxWxH: 54 x 37 x 18 Centimeters\nGeneric Name: Backpack\n'

In [9]:
amazon_df.iloc[13].Details

'Product Dimensions: 30 x 18 x 42 cm; 700 Grams\nDate First Available: 17 March 2022\nManufacturer: FUR JADEN\nASIN: B09VTDMRY7\nItem model number: BM83\nCountry of Origin: India\nDepartment: unisex-adult\nManufacturer: FUR JADEN, Fur Jaden, 3C Jai Hind Bld, Dr AM Road, Bhuleshwar, Mumbai 400002\nPacker: Fur Jaden, 3C Jai Hind Bld, Dr AM Road, Bhuleshwar, Mumbai 400002\nItem Weight: 700 g\nItem Dimensions LxWxH: 30 x 18 x 42 Centimeters\nNet Quantity: 1.00 Unit\nGeneric Name: Anti Theft Laptop Backpack\n'