In [88]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [89]:
url = 'https://www.amazon.in/gp/bestsellers/electronics/1389432031/ref=zg_bs_pg_1_electronics?ie=UTF8&pg=1'

In [90]:
header = {'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', 'Accept-Language':'en-US'}

In [91]:
webpage = requests.get(url, headers=header)

In [92]:
webpage

<Response [200]>

In [93]:
soup = BeautifulSoup(webpage.content, 'html.parser')

In [94]:
prod_ranks = soup.find_all('span', class_='zg-bdg-text')
product_rank = []
for rank in prod_ranks:
    product_rank.append(rank.text.strip('#'))

prod_links = soup.find_all('a', class_='a-link-normal aok-block')
link_to_product = []
for link in prod_links:
    link_to_product.append('https://amazon.in' + link.get('href'))

In [95]:
prod_ratings = soup.find_all('span', class_='a-icon-alt')
Product_ratings = []
for rating in prod_ratings:
    Product_ratings.append(rating.text)

In [96]:
def fetch_page(url, headers, max_retries=5):
    retries = 0
    while retries < max_retries:
        response = requests.get(url, headers=headers)
        if response.status_code == 503:
            retries += 1
            wait_time = 2 ** retries + random.uniform(0, 1)  # Exponential backoff with jitter
            print(f"503 Service Unavailable. Retrying in {wait_time:.2f} seconds...")
            time.sleep(wait_time)
        else:
            return response
    return None

In [97]:
def get_name(soup):
    name = soup.find('span', attrs={'class':'a-size-large product-title-word-break', 'id':'productTitle'})
    if name:
        return name.text.strip()
    else:
        return " "
    
def get_rating(soup):
    rating = soup.find('span', class_='a-icon-alt')
    if rating:
        return rating.text
    else:
        return " "
    
def get_rating_numbers(soup):
    rat = soup.find('span', attrs={'class':'a-size-base', 'id':'acrCustomerReviewText'})
    if rat:
        return rat.text.strip('ratings')
    else:
        return " "
    
def get_price(soup):
    price = soup.find('span', class_='a-price-whole')
    if price:
        return price.text
    else:
        return " "
    
def get_discount(soup):
    discount = soup.find('span', class_='a-size-large a-color-price savingPriceOverride aok-align-center reinventPriceSavingsPercentageMargin savingsPercentage')
    if discount:
        return discount.text[1:]
    else:
        return " "
    
def get_product_details(soup):
    prod_details = soup.find('table', class_='a-normal a-spacing-micro')
    if prod_details:
        brand = prod_details.find('tr', class_='a-spacing-small po-brand')
        if brand:
            brand = brand.find('span', class_='a-size-base po-break-word')
            if brand:
                brand = brand.text
            else:
                brand = ' '
        else:
            brand = " "
        operating_system = prod_details.find('tr', class_='a-spacing-small po-operating_system')
        if operating_system:
            operating_system = operating_system.find('span', class_='a-size-base po-break-word')
            if operating_system:
                operating_system = operating_system.text
            else:
                operating_system = ' '
        else:
            operating_system = ' '
        RAM_installed_size = prod_details.find('tr', class_='a-spacing-small po-ram_memory.installed_size')
        if RAM_installed_size:
            RAM_installed_size = RAM_installed_size.find('span', class_='a-size-base po-break-word')
            if RAM_installed_size:
                RAM_installed_size = RAM_installed_size.text
            else:
                RAM_installed_size = ' '
        else:
            RAM_installed_size = ' '
        Storage_cap = prod_details.find('tr', class_='a-spacing-small po-memory_storage_capacity')
        if Storage_cap:
            Storage_cap = Storage_cap.find('span', class_='a-size-base po-break-word')
            if Storage_cap:
                Storage_cap = Storage_cap.text
            else:
                Storage_cap = ' '
        else:
            Storage_cap = ' '
        return {'Brand':brand, 'Operating System':operating_system, 'RAM':RAM_installed_size, 'Storage capacity':Storage_cap}
    else:
        return " "
    
def get_reviews(soup):
    review = soup.find('div', class_='a-section a-spacing-medium brand-snapshot-flex-row brand-snapshot-flex-wrap')
    if review:
        customer_reviews = [] 
        review_span = review.find_all('span', class_='a-size-small brand-snapshot-chip-text')
        for rev in review_span:
            customer_reviews.append(rev.text.strip())
        return customer_reviews
    else:
        return "Can't find reviews"

In [98]:
amazon_dict = {'Product Name':[], 'Ratings':[], 'No. of ratings':[], 'Price':[], 'Discount':[], 'Brand':[], 'Operating System':[], 'RAM':[], 'Storage capacity':[], 'Customer reviews':[]}

In [99]:
for link in link_to_product:
    prod_webpage = fetch_page(link, header)
    
    if prod_webpage and prod_webpage.status_code == 200:
        new_soup = BeautifulSoup(prod_webpage.content, 'html.parser')
        
        amazon_dict['Product Name'].append(get_name(new_soup))
        amazon_dict['Ratings'].append(get_rating(soup))
        amazon_dict['No. of ratings'].append(get_rating_numbers(new_soup))
        amazon_dict['Price'].append(get_price(new_soup))
        amazon_dict['Discount'].append(get_discount(new_soup))
        amazon_dict['Brand'].append(get_product_details(new_soup)['Brand'])
        amazon_dict['Operating System'].append(get_product_details(new_soup)['Operating System'])
        amazon_dict['RAM'].append(get_product_details(new_soup)['RAM'])
        amazon_dict['Storage capacity'].append(get_product_details(new_soup)['Storage capacity'])
        amazon_dict['Customer reviews'].append(get_reviews(new_soup))
        
    else:
        print("Failed to retrieve the page after multiple attempts.")

In [110]:
df = pd.DataFrame.from_dict(amazon_dict)

In [111]:
df

Unnamed: 0,Product Name,Ratings,No. of ratings,Price,Discount,Brand,Operating System,RAM,Storage capacity,Customer reviews
0,"Samsung Galaxy M34 5G (Prism Silver,6GB,128GB)...",4.0 out of 5 stars,12600.0,12499.0,49%,Samsung,Android 13.0,6 GB,128 GB,"[good battery, good camera, fingerprint sensor]"
1,"Redmi 13C 5G (Startrail Green, 4GB RAM, 128GB ...",4.0 out of 5 stars,1313.0,10499.0,25%,Redmi,"MIUI 14, Android 13.0",4 GB,128 GB,"[value for money, good performance, looks good]"
2,"Redmi 13C 5G (Startrail Silver, 4GB RAM, 128GB...",4.0 out of 5 stars,941.0,10499.0,25%,Redmi,"MIUI 14, Android 13.0",4 GB,128 GB,"[value for money, good performance, looks good]"
3,"iQOO Z9x 5G (Tornado Green, 6GB RAM, 128GB Sto...",4.0 out of 5 stars,1040.0,14499.0,24%,iQOO,Funtouch OS 14 based on Android 14,6 GB,128 GB,"[good for gaming, fast charging, good performa..."
4,"iQOO Z9x 5G (Storm Grey, 6GB RAM, 128GB Storag...",4.0 out of 5 stars,1040.0,14499.0,24%,iQOO,Funtouch OS 14 based on Android 14,6 GB,128 GB,"[good for gaming, fast charging, good performa..."
5,Redmi 12 5G Jade Black 6GB RAM 128GB ROM,4.0 out of 5 stars,4742.0,12499.0,31%,Redmi,"MIUI 14, Android 13.0",6 GB,128 GB,"[value for money, good performance, looks good]"
6,"Samsung Galaxy M34 5G (Prism Silver,6GB,128GB)...",4.0 out of 5 stars,12600.0,12499.0,49%,Samsung,Android 13.0,6 GB,128 GB,"[good battery, good camera, fingerprint sensor]"
7,"Redmi 13C 5G (Starlight Black, 4GB RAM, 128GB ...",4.0 out of 5 stars,1074.0,10499.0,25%,Redmi,"MIUI 14, Android 13.0",4 GB,128 GB,"[value for money, good performance, looks good]"
8,"OnePlus Nord CE4 Lite 5G (Super Silver, 8GB RA...",4.0 out of 5 stars,,19999.0,5%,OnePlus,OxygenOS,8 GB,128 GB,"[fast charging, good speed, good camera]"
9,POCO M6 Pro 5G (128 GB) (6 GB RAM) (Power Black),4.0 out of 5 stars,3059.0,9999.0,41%,POCO,ANDROID,6 GB,6 GB,"[value for money, looks good, good performance]"


In [112]:
df['Rank'] = product_rank

In [115]:
df['Ratings'][0:8] = Product_ratings[0:8]

In [116]:
df['Ratings'][9:16] = Product_ratings[8:15]

In [121]:
df['Ratings'][17:30] = Product_ratings[15:28]

In [123]:
df['Ratings'][8] = ' '

In [124]:
df['Ratings'][16] = ' '

In [125]:
df

Unnamed: 0,Product Name,Ratings,No. of ratings,Price,Discount,Brand,Operating System,RAM,Storage capacity,Customer reviews,Rank
0,"Samsung Galaxy M34 5G (Prism Silver,6GB,128GB)...",4.0 out of 5 stars,12600.0,12499.0,49%,Samsung,Android 13.0,6 GB,128 GB,"[good battery, good camera, fingerprint sensor]",1
1,"Redmi 13C 5G (Startrail Green, 4GB RAM, 128GB ...",4.0 out of 5 stars,1313.0,10499.0,25%,Redmi,"MIUI 14, Android 13.0",4 GB,128 GB,"[value for money, good performance, looks good]",2
2,"Redmi 13C 5G (Startrail Silver, 4GB RAM, 128GB...",4.0 out of 5 stars,941.0,10499.0,25%,Redmi,"MIUI 14, Android 13.0",4 GB,128 GB,"[value for money, good performance, looks good]",3
3,"iQOO Z9x 5G (Tornado Green, 6GB RAM, 128GB Sto...",4.1 out of 5 stars,1040.0,14499.0,24%,iQOO,Funtouch OS 14 based on Android 14,6 GB,128 GB,"[good for gaming, fast charging, good performa...",4
4,"iQOO Z9x 5G (Storm Grey, 6GB RAM, 128GB Storag...",4.1 out of 5 stars,1040.0,14499.0,24%,iQOO,Funtouch OS 14 based on Android 14,6 GB,128 GB,"[good for gaming, fast charging, good performa...",5
5,Redmi 12 5G Jade Black 6GB RAM 128GB ROM,4.0 out of 5 stars,4742.0,12499.0,31%,Redmi,"MIUI 14, Android 13.0",6 GB,128 GB,"[value for money, good performance, looks good]",6
6,"Samsung Galaxy M34 5G (Prism Silver,6GB,128GB)...",4.0 out of 5 stars,12600.0,12499.0,49%,Samsung,Android 13.0,6 GB,128 GB,"[good battery, good camera, fingerprint sensor]",7
7,"Redmi 13C 5G (Starlight Black, 4GB RAM, 128GB ...",4.0 out of 5 stars,1074.0,10499.0,25%,Redmi,"MIUI 14, Android 13.0",4 GB,128 GB,"[value for money, good performance, looks good]",8
8,"OnePlus Nord CE4 Lite 5G (Super Silver, 8GB RA...",,,19999.0,5%,OnePlus,OxygenOS,8 GB,128 GB,"[fast charging, good speed, good camera]",9
9,POCO M6 Pro 5G (128 GB) (6 GB RAM) (Power Black),3.8 out of 5 stars,3059.0,9999.0,41%,POCO,ANDROID,6 GB,6 GB,"[value for money, looks good, good performance]",10


In [126]:
len(link_to_product)

30

In [128]:
df['Link to product'] = link_to_product

In [129]:
df.to_csv('Amazon_Bestseller_Mobiles.csv', index=False)