# Webscrapping de datos de MercadoLibre


In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

## Scrappeamos una sóla página 

In [3]:
# Definimos la URL para scrappear
# url = 'https://www.mercadolibre.com.ar/mas-vendidos#menu=categories' 
url = 'https://listado.mercadolibre.com.ar/notebook#trends_tracking_id=61b3c37c-e0da-4304-ac5f-cb66f17b876e&component_id=MOST_WANTED'

# Hacemos el request de la url, y luego lo metemos en BeautifulSoup
html = requests.get(url.strip(), headers={'User-Agent': 'Mozilla/5.0'}).text
soup = BeautifulSoup(html, "lxml")


In [219]:
# Definimos un dict en blanco para cargarle los resultados en cada iteración
# i es el identificador del producto como clave principal del diccionario
dict = {}
i = 0

# Obtenemos el cuadro principal de los resultados, donde se encuentran divs mas pequeños con 
# la especificación de cada producto
item_list = soup.find_all('div', class_="ui-search-result__content-wrapper shops__result-content-wrapper")

# Ahora en item vamos guardando cada div de cada producto, y le extraemos las variables de principal interés
for item in item_list:
    product_name = item.find('h2')
    product_name = product_name.text
    product_price_symbol = item.find('span', {'class' : 'price-tag-symbol'})
    product_price_number = item.find('span', {'class' : 'price-tag-fraction'})
    product_discount = item.find('span', {'class' : 'ui-search-price__discount shops__price-discount'})
    product_cuotas = item.find('span', {'class' : 'ui-search-item__group__element shops__items-group-details ui-search-installments ui-search-color--LIGHT_GREEN'})
    product_shipping = item.find('p', {'class' : 'ui-search-item__shipping ui-search-item__shipping--free shops__item-shipping-free'}).text
    product_reviews = item.find('span', {'class' : 'ui-search-reviews__amount'})
    product_seller = item.find('p', {'class' : 'ui-search-official-store-label ui-search-item__group__element shops__items-group-details ui-search-color--GRAY'})
    product_link = item.find('a').get('href')

#     If find_all() can’t find anything, it returns an empty list. If find() can’t find anything, it returns None
    
#   .text is just a property that calls get_text. Therefore, calling get_text without arguments is 
#   the same thing as .text. However, get_text can also support various keyword arguments to change how 
#   it behaves (separator, strip, types). If you need more control over the result, 
#   then you need the functional form.

# Scamos el text de las etiquetas

    if product_price_symbol is not None:
            product_price_symbol = product_price_symbol.get_text()
    if product_price_number is not None:
        product_price_number = product_price_number.get_text()
    if product_discount is not None:
        product_discount = product_discount.get_text()
    if product_cuotas is not None:
        product_cuotas = product_cuotas.get_text()
    if product_reviews is not None:
        product_reviews = product_reviews.get_text()
    if product_seller is not None:
        product_seller = product_seller.get_text()
    
# Guardamos los resultados a un diccionario anidado.
    dict.update({i:{
        'product_name': product_name,
        'price_tag_symbol': product_price_symbol,
        'product_price_number': product_price_number,
        'product_discount': product_discount,
        'product_cuotas': product_cuotas,
        'product_shipping': product_shipping,
        'product_reviews': product_reviews,
        'product_seller': product_seller,
        'product_link': product_link
    }})

# Aumentamos el valor de i, para que el próximo producto tenga un id mayor al corriente    
    i= i + 1
    


In [232]:
# Creamos un dataframe a partir de un dict. Tenemos que aplicar transpose para que cambie filas por columnas 
df = pd.DataFrame(dict).transpose()

## Scrappeamos página por página 

In [4]:
# Función de request. Vamos a llamarla cuando se necesite cambiar de página. Sólo le pasamos la URL

def request_url(url):
    html = requests.get(url.strip(), headers={'User-Agent': 'Mozilla/5.0'}).text
    soup = BeautifulSoup(html, "lxml")
    return soup

In [250]:
dict = {}
i = 0

# identificador de página en la url de MercadoLibre
pages = [1, 51, 101, 151, 201, 251, 301]

# La única diferencia respecto al anterior, es que ahora creamos un bucle por encima para que pase de página
# una vez que obtuvo la información de todos los productos de un página.
for page in pages:
    
    url = 'https://listado.mercadolibre.com.ar/computacion/laptops-accesorios/notebooks/notebook_Desde_' + str(page) + '_NoIndex_True' 
    soup = request_url(url)
    
    item_list = soup.find_all('div', class_="ui-search-result__content-wrapper shops__result-content-wrapper")
    
    for item in item_list:
        product_name = item.find('h2')
        product_price_symbol = item.find('span', {'class' : 'price-tag-symbol'})
        product_price_number = item.find('span', {'class' : 'price-tag-fraction'})
        product_discount = item.find('span', {'class' : 'ui-search-price__discount shops__price-discount'})
        product_cuotas = item.find('span', {'class' : 'ui-search-item__group__element shops__items-group-details ui-search-installments ui-search-color--LIGHT_GREEN'})
        product_shipping = item.find('p', {'class' : 'ui-search-item__shipping ui-search-item__shipping--free shops__item-shipping-free'})
        product_reviews = item.find('span', {'class' : 'ui-search-reviews__amount'})
        product_seller = item.find('p', {'class' : 'ui-search-official-store-label ui-search-item__group__element shops__items-group-details ui-search-color--GRAY'})
        product_label = item.find('label', {'class': 'ui-search-styled-label ui-search-item__highlight-label__text'})
        product_link = item.find('a').get('href')
        product_old_price = item.find('span', {'class':'price-tag-text-sr-only'})
        product_name = product_name.text

    #     If find_all() can’t find anything, it returns an empty list. If find() can’t find anything, it returns None

    #   .text is just a property that calls get_text. Therefore, calling get_text without arguments is 
    #   the same thing as .text. However, get_text can also support various keyword arguments to change how 
    #   it behaves (separator, strip, types). If you need more control over the result, 
    #   then you need the functional form.

        if product_price_symbol is not None:
            product_price_symbol = product_price_symbol.get_text()
        if product_price_number is not None:
            product_price_number = product_price_number.get_text()
        if product_discount is not None:
            product_discount = product_discount.get_text()
        if product_cuotas is not None:
            product_cuotas = product_cuotas.get_text()
        if product_shipping is not None:
            product_shipping = product_shipping.get_text()
        if product_reviews is not None:
            product_reviews = product_reviews.get_text()
        if product_seller is not None:
            product_seller = product_seller.get_text()
        if product_label is not None:
            product_label = product_label.get_text()
        if product_old_price is not None:
            product_old_price = product_old_price.get_text()
            

        dict.update({i:{
            'product_name': product_name,
            'price_tag_symbol': product_price_symbol,
            'product_price_number': product_price_number,
            'product_label': product_label,
            'product_old_price':product_old_price,
            'product_discount': product_discount,
            'product_cuotas': product_cuotas,
            'product_shipping': product_shipping,
            'product_reviews': product_reviews,
            'product_seller': product_seller,
            'product_link': product_link
        }})

        i= i + 1


In [251]:
df = pd.DataFrame(dict).transpose()

In [252]:
df

Unnamed: 0,product_name,price_tag_symbol,product_price_number,product_label,product_old_price,product_discount,product_cuotas,product_shipping,product_reviews,product_seller,product_link
0,Notebook Exo Xq3k-sh38828 Intel I3 8gb Ssd 256...,$,181.999,OFERTA DEL DÍA,Antes: 181999 pesos,17% OFF,Hasta 12 cuotas sin interés,Envío gratis,,Vendido por EXO,https://articulo.mercadolibre.com.ar/MLA-11639...
1,"Notebook HP 15-ef2126wm spruce blue 15.6"", AMD...",$,155.422,,155422 pesos,,,,447,,https://www.mercadolibre.com.ar/notebook-hp-15...
2,"Notebook Dell Inspiron 3515 azul 15.5"", AMD Ry...",$,199.999,,Antes: 199999 pesos,20% OFF,,Envío gratis,171,,https://www.mercadolibre.com.ar/notebook-dell-...
3,"Notebook Dell Inspiron 3515 plateada 15.5"", AM...",$,145.900,MÁS VENDIDO,145900 pesos,,,Envío gratis,171,,https://www.mercadolibre.com.ar/notebook-dell-...
4,"Apple Macbook Air (13 pulgadas, 2020, Chip M1,...",$,299.699,,299699 pesos,,,Envío gratis,502,,https://www.mercadolibre.com.ar/apple-macbook-...
...,...,...,...,...,...,...,...,...,...,...,...
381,"Notebook Bangho 15,6 Core I5 8gb 480gb Ssd Bes...",$,269.999,,Antes: 269999 pesos,10% OFF,Hasta 6 cuotas sin interés,Envío gratis,,Vendido por Bangho,https://articulo.mercadolibre.com.ar/MLA-11099...
382,Notebook Positivo Bgh At550 4gb De Ram Disco 5...,$,59.000,,Antes: 59000 pesos,5% OFF,,Envío gratis,,,https://articulo.mercadolibre.com.ar/MLA-11478...
383,Notebook Hp 245 G8 Amd Ryzen 5 5500 8 Gb 1 Tb ...,$,134.999,,134999 pesos,,,Envío gratis,,,https://articulo.mercadolibre.com.ar/MLA-11329...
384,Notebook Asus Core I7-11370h Rtx3050 512m.2 16...,$,349.999,,349999 pesos,,,Envío gratis,,,https://articulo.mercadolibre.com.ar/MLA-11783...
