In [2]:
from selenium import webdriver
from bs4 import BeautifulSoup
import pandas as pd

In [42]:
def get_search_url(search_term: str, n: int = 200) -> str:
    """
    Returns Soriana url for a search term and a page number 
    """
    
    search_term_plus = search_term.replace(' ', '+')
    
    url = f'https://www.soriana.com/buscar?q={search_term_plus}' \
            + f'&start=1&sz={n}&pageNumber=1&forceOldView=false&view=grid'
    
    return url

def get_attributes(driver, url:str):
    """
    doc
    """
    # Get beautifulSoup object
    driver.get(url)
    soup = BeautifulSoup(driver.page_source, 'html.parser')

    # Get titles
    results_titles = soup.find_all('a', {'class': 'product-tile--link'})

    # Get prices
    result_prices = soup.find_all('div', {'class':'product-tile--price'})

    # Title list
    titles = [t.text for t in results_titles]

    # url's list
    urls = [f'https://www.soriana.com{t.get("href")}' for t in results_titles]

    # Prices list
    prices = [t.find('span','').text for t in result_prices]

    # Output DataFrame
    df_out = pd.DataFrame({
        'description': titles,
        'url': urls,
        'price': prices
    })

    # Clean prices
    df_out.price = df_out.price.str.replace(r'[\n$,]','',regex=True).astype(float)

    # Extract sku from rl
    df_out['sku'] = df_out.url.str.extract(r'/([\d-]+)\.html')[0] \
                            .str.replace('-','')

    # Order columns
    df_out = df_out[['sku','description','price','url']]

    return df_out

def get_all_soriana_data(search_term: str, n=200):
    # Define driver
    driver = webdriver.Chrome()

    # Get url
    search_url = get_search_url(search_term, n)

    # Get data
    df_out = get_attributes(driver, search_url)

    # Close driver
    driver.close()

    return df_out

In [53]:
# Search for perfume de mujer data
data = get_all_soriana_data(search_term='perfume de mujer')

In [54]:
data.shape

(127, 4)

In [55]:
len(data.sku.unique())

127

In [56]:
data

Unnamed: 0,sku,description,price,url
0,11470961,Perfume Sweet Like Candy 100 Ml Edp Spray para Dama,899.0,https://www.soriana.com/perfume-sweet-like-candy-100-ml-edp-spray-para-dama/11470961.html
1,11470960,Perfume Ariana Grande 100 Ml Edp Spray para Dama,889.0,https://www.soriana.com/perfume-ariana-grande-100-ml-edp-spray-para-dama/11470960.html
2,11146414,Perfume Boss Femme 75 Ml Edp Spray para Dama,729.0,https://www.soriana.com/perfume-boss-femme-75-ml-edp-spray-para-dama/11146414.html
3,11361386,Perfume Bright Crystal Absolu 90 Ml Edp Spray para Dama,1369.0,https://www.soriana.com/perfume-bright-crystal-absolu-90-ml-edp-spray-para-dama/11361386.html
4,11422709,Perfume Coach 90 Ml Edp Spray para Dama,959.0,https://www.soriana.com/perfume-coach-90-ml-edp-spray-para-dama/11422709.html
...,...,...,...,...
122,11740140,Lapidus Woman 100 Ml Edt Spray Para Dama,469.0,https://www.soriana.com/lapidus-woman-100-ml-edt-spray-para-dama/11740140.html
123,11740248,Dolce Shine 75ml Edp Spray Para Dama Sin,1209.0,https://www.soriana.com/dolce-shine-75ml-edp-spray-para-dama-sin/11740248.html
124,11554960,Set Agatha Wow Girl 3Pzs para Dama,609.0,https://www.soriana.com/set-agatha-wow-girl-3pzs-para-dama/11554960.html
125,11617164,Set Guess Girl Belle 3Pzs para Dama,1070.0,https://www.soriana.com/set-guess-girl-belle-3pzs-para-dama/11617164.html
