In [8]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_product_data(soup):
    products = []
    for product_html in soup.find_all('div', {'class': 's-result-item'}):
        product = {}
        link = product_html.find('a', {'class': 'a-link-normal'})
        product['url'] = link['href'] if link else ''
        if product['url']:
            try:
                product['name'] = product_html.find('span', {'class': 'a-size-medium a-color-base a-text-normal'}).text.strip()
            except AttributeError:
                product['name'] = ''
            try:
                product['price'] = product_html.find('span', {'class': 'a-offscreen'}).text.strip()
            except AttributeError:
                product['price'] = ''
            try:
                product['rating'] = product_html.find('span', {'class': 'a-icon-alt'}).text.strip()
            except AttributeError:
                product['rating'] = ''
            try:
                product['reviews'] = product_html.find('div', {'class': 'a-section a-text-center'}).text.strip()
            except AttributeError:
                product['reviews'] = ''
            products.append(product)
    return products


def scrape_product_details(url):
    product_details = {}
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    description = soup.find('div', {'id': 'productDescription'})
    product_details['description'] = description.text.strip() if description else ''
    asin = soup.find('td', {'class': 'a-size-medium a-color-secondary'})
    product_details['asin'] = asin.text.strip() if asin else ''
    product_description = soup.find('div', {'class': 'feature'})
    product_details['product_description'] = product_description.text.strip() if product_description else ''
    manufacturer = soup.find('td', {'class': 'a-size-medium a-color-secondary'})
    product_details['manufacturer'] = manufacturer.text.strip() if manufacturer else ''
    return product_details

products_data = []
for page in range(1, 21):
    url = "https://www.amazon.in/s?k=bags&crid=2M096C61O4MLT&qid=1653308124&sprefix=ba%2Caps%2C283&ref=sr_pg_1" + str(page)
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    products_data.extend(scrape_product_data(soup))

all_product_details = []
for product in products_data[:200]:
    product_url = "https://www.amazon.in" + product['url']
    all_product_details.append({**product, **scrape_product_details(product_url)})

df = pd.DataFrame(all_product_details)
df.to_csv('/Users/saifpatel/Python/products_data.csv', index=False)

174
{'url': '/gp/help/customer/display.html?nodeId=200533820', 'name': '', 'price': '', 'rating': '', 'reviews': ''}
[{'url': '/b/ref=sxts_spks_0_0_d1b13407-0df6-43f6-a6ba-66a65c6b6cff?node=21102587031&pd_rd_w=vmdXu&content-id=amzn1.sym.d1b13407-0df6-43f6-a6ba-66a65c6b6cff:amzn1.sym.d1b13407-0df6-43f6-a6ba-66a65c6b6cff&pf_rd_p=d1b13407-0df6-43f6-a6ba-66a65c6b6cff&pf_rd_r=W9EX971H018G29KQQW9Q&pd_rd_wg=qMFvu&pd_rd_r=be7b448b-48f7-42fc-bd92-99183e6c3ad2&qid=1675407532', 'name': '', 'price': '', 'rating': '', 'reviews': '', 'description': '', 'asin': '', 'product_description': '', 'manufacturer': ''}, {'url': '/gp/bestsellers/luggage/2917436031/ref=sr_bs_0_2917436031_1', 'name': 'American Tourister 32 Ltrs Black Casual Backpack (AMT FIZZ SCH BAG 02 - BLACK)', 'price': '₹1,299', 'rating': '4.1 out of 5 stars', 'reviews': '', 'description': '', 'asin': '', 'product_description': '', 'manufacturer': ''}, {'url': '/Wesley-Milestone-Waterproof-Backpack-Business/dp/B084JGJ8PF/ref=ice_ac_b_dpb', 