In [1]:
from bs4 import BeautifulSoup
import os
from dotenv import load_dotenv
from urllib.parse import urlencode
import requests

In [2]:
load_dotenv()
API_KEY = os.environ['SCRAPER_APIKEY']

In [3]:
session = requests.Session()

In [4]:
def get_scraperapi_url(search_term):

    def create_search_url(search_term):
        return 'https://www.amazon.com/s?' + urlencode({'k': search_term})
  
    payload = {'api_key': API_KEY, 'url': create_search_url(search_term)}
    proxy_url = 'http://api.scraperapi.com/?' + urlencode(payload)
    return proxy_url

In [5]:
def load_page(url):
    response = session.get(url)
    if response.status_code == 200:
        print(f'[{response.status_code}] OK')
        return response.content
    else:
        print(f'Failed to fetch the page. Status code: {response.status_code}')

In [6]:
get_price = lambda tag: tag.find('span', class_='a-offscreen').get_text(strip=True)

In [7]:
def get_discounts(soup):
    results = soup.find_all('div', attrs={'data-asin': True})

    discounts = []

    for div in results:
        if div['data-asin'] =='':
            continue
        prices = {}
            
        description_span = div.find('span', class_='a-size-medium a-color-base a-text-normal')

        deal_price_span = div.find('span', class_='a-price')
        if deal_price_span:
            prices['new_price'] = get_price(deal_price_span)
        
        old_price_span = div.find('span', class_='a-price a-text-price')
        if old_price_span:
            prices['old_price'] = get_price(old_price_span)
            discounts.append(prices)
            prices['ASIN'] = div['data-asin']
            prices['description'] = description_span.get_text(strip=True) if description_span else 'Not Found'
        else:
            continue

    return discounts

In [8]:
def next_page(link):
    page = 'https://www.amazon.com' + link  
    payload = {'api_key': API_KEY, 'url': page}
    proxy_url = 'http://api.scraperapi.com/?' + urlencode(payload)
    return proxy_url

In [9]:
def get_next_page_link(soup):
    for child in soup.find('span', class_='s-pagination-strip'):
        link = child.get('href')
        if link and child.text == "Next":
            return link

In [10]:
search = "Laptop"

In [11]:
url = get_scraperapi_url(search)
webpage = load_page(url)

[200] OK


In [12]:
if webpage:
    soup = BeautifulSoup(webpage, 'lxml')
    print(soup.title.text)    

Amazon.com : Laptop


In [13]:
discounts = get_discounts(soup)

print(len(discounts))

8


In [14]:
next = get_next_page_link(soup)
page_count = 1
while next:
    webpage = load_page(next_page(next))
    if webpage:
        page_count+=1
        _soup = BeautifulSoup(webpage, 'lxml')
        discounts.extend(get_discounts(_soup))
        next = get_next_page_link(_soup)

print(f"{page_count} pages {len(discounts)} Products Discovered for {search}")

[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
[200] OK
20 pages 134 Products Discovered for Laptop


In [16]:
import pandas as pd

In [17]:
pd.DataFrame(discounts) 

Unnamed: 0,new_price,old_price,ASIN,description
0,$648.99,$799.97,B09BW611ZP,Not Found
1,$490.05,$524.99,B0C3RNRB8W,"HP 17 Laptop, 17.3” HD+ Display, 11th Gen Inte..."
2,$309.99,$329.99,B0BS4BP8FB,Acer Aspire 3 A315-24P-R7VH Slim Laptop | 15.6...
3,$199.00,$249.99,B09SVR5VD4,ASUS Vivobook Go 15 L510 Thin & Light Laptop C...
4,$176.99,$299.99,B08YKHYCPW,"Lenovo IdeaPad 1 14 Laptop, 14.0"" HD Display, ..."
...,...,...,...,...
129,"$2,096.76","$2,199.99",B0BZQP8591,"ASUS Vivobook Pro 16X Laptop, 16” 16:10 Displa..."
130,$272.99,$299.99,B0C49H9WSD,HP est 14'' Ultral Light Laptop for Students a...
131,$175.00,$213.59,B01AA1ERYI,"Dell Latitude E6520 15.6 Inch Business Laptop,..."
132,$269.99,$289.99,B0CJFMV266,"Laptop Computer, 15.6"" Laptop, 16GB RAM 512GB ..."
