In [37]:
from selenium import webdriver
from bs4 import BeautifulSoup

import pandas as pd

In [61]:
def get_search_url(search_term: str, page: int = 1) -> str:
    """
    Returns Amazon url for a search term and a page number 
    """
    
    search_term_plus = search_term.replace(' ', '+')
    
    url = 'http://www.amazon.com.mx/s?k=' + search_term_plus + '&page=' + str(page)
    
    return url

def get_search_items(driver, url:str) -> list:
    """
    From a selenium webdriver and an Amazon search url
    returns a list of beautifulSoup items in the search
    """
    
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    results = soup.find_all('div', {'data-component-type': 's-search-result'})
    
    return results

def get_attributes(item):
    """
    From a BeautifulSoup item returns a tuple containing:
     sku, description, url, price, rating
    """
    
    #sku
    sku = item.attrs['data-asin']
    
    #description
    atag = item.h2.a
    description = atag.text.strip()
    
    #url
    url = 'http://www.amazon.com.mx' + atag.get('href')
    
    #price
    try:
        price_parent = item.find('span', 'a-price')
        price = price_parent.find('span', 'a-offscreen').text
    except AttributeError:
        price = ''
        
    #rating
    try:
        rating = item.i.text
    except AttributeError:
        rating = ''
        
    #number of ratings
    try:
        no_of_ratings = int(item.find('span', {'class': 'a-size-base'}).text)
    except:
        no_of_ratings = ''
        
        
    result = (sku, description, url, price, rating, no_of_ratings)
    
    return result

def get_all_amazon_data(search_term: str, driver) -> pd.core.frame.DataFrame:
    """
    Based on a search term and using a webdriver
     returns a pandas dataframe with all products of an Amazon search
    """
    
    data_list = []
    
    for i in range(1,21):
        search_url = get_search_url(search_term=search_term, page=i)
        results = get_search_items(driver, url=search_url)
        
        for item in results:
            data_list.append(get_attributes(item))
    
    data = pd.DataFrame(data_list, columns=['sku', 'description', 'url', 'price', 'rating', 'no_of_ratings'])
    
    return data

In [62]:
### search for laptops data

driver = webdriver.Chrome()

data = get_all_amazon_data(search_term='perfumes de mujer', driver=driver)

driver.close()

data

Unnamed: 0,sku,description,url,price,rating,no_of_ratings
0,B08B43LBXZ,ésika Vibranza Perfume de Mujer,http://www.amazon.com.mx/gp/slredirect/picasso...,$374.90,4.6 de 5 estrellas,166
1,B08B44441V,ésika Bela Perfume de Mujer,http://www.amazon.com.mx/gp/slredirect/picasso...,$389.90,4.3 de 5 estrellas,39
2,B0009OAHCS,"Zino Davidoff Cool Water Spray para Mujer, 3.4...",http://www.amazon.com.mx/Zino-Davidoff-Water-T...,$485.00,4.7 de 5 estrellas,
3,B00H3R9JLE,"Bebe Desire Eau de Parfum Spray for Women, 3.4...",http://www.amazon.com.mx/Bebe-Desire-Women-3-4...,$459.00,4.4 de 5 estrellas,120
4,B00021AJ5I,"Perfume, Calvin Klein Eternity, Spray para muj...",http://www.amazon.com.mx/Calvin-Klein-Eternity...,$925.00,4.6 de 5 estrellas,
...,...,...,...,...,...,...
541,B0917R38PR,"Guarm Juego de manicura, juego de pedicura par...",http://www.amazon.com.mx/gp/slredirect/picasso...,$199.00,4.5 de 5 estrellas,67
542,B07YQ21X2L,50 piezas de botellas de viaje transparentes d...,http://www.amazon.com.mx/gp/slredirect/picasso...,$289.00,3.9 de 5 estrellas,20
543,B06WPBSW8Y,Nivea Crema Corporal Humectante Con Vitamina E...,http://www.amazon.com.mx/gp/slredirect/picasso...,$97.00,4.7 de 5 estrellas,
544,B07PPB3V4R,Nuxe Creme prodigieuse boost - primer 5-en-1 (...,http://www.amazon.com.mx/gp/slredirect/picasso...,$690.00,3.8 de 5 estrellas,5


In [63]:
data.to_csv('amazon_perfumes.csv',header=True, index=False)