# Webscrapping de datos de MercadoLibre


In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

## Scrappeamos una sóla página 

In [2]:
# Definimos la URL para scrappear
# url = 'https://www.mercadolibre.com.ar/mas-vendidos#menu=categories' 
url = 'https://listado.mercadolibre.com.ar/notebook#trends_tracking_id=61b3c37c-e0da-4304-ac5f-cb66f17b876e&component_id=MOST_WANTED'

# Hacemos el request de la url, y luego lo metemos en BeautifulSoup
html = requests.get(url.strip(), headers={'User-Agent': 'Mozilla/5.0'}).text
soup = BeautifulSoup(html, "lxml")


In [9]:
# Definimos un dict en blanco para cargarle los resultados en cada iteración
# i es el identificador del producto como clave principal del diccionario
dict = {}
i = 0

# Obtenemos el cuadro principal de los resultados, donde se encuentran divs mas pequeños con 
# la especificación de cada producto
item_list = soup.find_all('div', class_="ui-search-result__content-wrapper shops__result-content-wrapper")

# Ahora en item vamos guardando cada div de cada producto, y le extraemos las variables de principal interés
for item in item_list:
    product_name = item.find('h2')
    product_name = product_name.text
    product_price_symbol = item.find('span', {'class' : 'price-tag-symbol'})
    product_price_number = item.find('span', {'class' : 'price-tag-fraction'})
    product_discount = item.find('span', {'class' : 'ui-search-price__discount shops__price-discount'})
    product_cuotas = item.find('span', {'class' : 'ui-search-item__group__element shops__items-group-details ui-search-installments ui-search-color--LIGHT_GREEN'})
    product_shipping = item.find('p', {'class' : 'ui-search-item__shipping ui-search-item__shipping--free shops__item-shipping-free'}).text
    product_reviews = item.find('span', {'class' : 'ui-search-reviews__amount'})
    product_seller = item.find('p', {'class' : 'ui-search-official-store-label ui-search-item__group__element shops__items-group-details ui-search-color--GRAY'})
    product_link = item.find('a').get('href')
    
#   If find_all() can’t find anything, it returns an empty list. If find() can’t find anything, it returns None
    
#   .text is just a property that calls get_text. Therefore, calling get_text without arguments is 
#   the same thing as .text. However, get_text can also support various keyword arguments to change how 
#   it behaves (separator, strip, types). If you need more control over the result, 
#   then you need the functional form.


#   Sacamos el text de las etiquetas

    if product_price_symbol is not None:
        product_price_symbol = product_price_symbol.get_text()
    if product_price_number is not None:
        product_price_number = product_price_number.get_text()
    if product_discount is not None:
        product_discount = product_discount.get_text()
    if product_cuotas is not None:
        product_cuotas = product_cuotas.get_text()
    if product_reviews is not None:
        product_reviews = product_reviews.get_text()
    if product_seller is not None:
        product_seller = product_seller.get_text()
    
# Guardamos los resultados a un diccionario anidado.
    dict.update({i:{
        'product_name': product_name,
        'price_tag_symbol': product_price_symbol,
        'product_price_number': product_price_number,
        'product_discount': product_discount,
        'product_cuotas': product_cuotas,
        'product_shipping': product_shipping,
        'product_reviews': product_reviews,
        'product_seller': product_seller,
        'product_link': product_link
    }})

# Aumentamos el valor de i, para que el próximo producto tenga un id mayor al corriente    
    i= i + 1
    

In [10]:
dict

{0: {'product_name': 'Notebook EXO Smart T33 gris 14", Intel Celeron N4020  4GB de RAM 64GB SSD, Intel UHD Graphics 600 1366x768px Windows 11',
  'price_tag_symbol': '$',
  'product_price_number': '75.000',
  'product_discount': '13% OFF',
  'product_cuotas': None,
  'product_shipping': 'Envío gratis',
  'product_reviews': '22',
  'product_seller': None,
  'product_link': 'https://www.mercadolibre.com.ar/notebook-exo-smart-t33-gris-14-intel-celeron-n4020-4gb-de-ram-64gb-ssd-intel-uhd-graphics-600-1366x768px-windows-11/p/MLA19535438?pdp_filters=category:MLA1652#searchVariation=MLA19535438&position=1&search_layout=stack&type=product&tracking_id=dc241f19-caed-4e55-9c7d-ba3b40b75737'},
 1: {'product_name': 'Notebook EXO Smart M33 gris 14.1", Intel Celeron N4020  4GB de RAM 64GB SSD, Intel UHD Graphics 600 1920x1080px Windows 10 Home',
  'price_tag_symbol': '$',
  'product_price_number': '108.999',
  'product_discount': '25% OFF',
  'product_cuotas': 'Hasta 12 cuotas sin interés',
  'produc

In [13]:
# Creamos un dataframe a partir de un dict. Tenemos que aplicar transpose para que cambie filas por columnas 
df = pd.DataFrame(dict).transpose()

In [14]:
df

Unnamed: 0,product_name,price_tag_symbol,product_price_number,product_discount,product_cuotas,product_shipping,product_reviews,product_seller,product_link
0,"Notebook EXO Smart T33 gris 14"", Intel Celeron...",$,75.000,13% OFF,,Envío gratis,22,,https://www.mercadolibre.com.ar/notebook-exo-s...
1,"Notebook EXO Smart M33 gris 14.1"", Intel Celer...",$,108.999,25% OFF,Hasta 12 cuotas sin interés,Envío gratis,58,Vendido por EXO,https://www.mercadolibre.com.ar/notebook-exo-s...
2,"Notebook Dell Inspiron 3515 plateada 15.5"", AM...",$,169.999,7% OFF,Hasta 6 cuotas sin interés,Envío gratis,185,,https://www.mercadolibre.com.ar/notebook-dell-...
3,"Notebook Lenovo 15,6 Amd Ryzen 5 5500u 8gb 25...",$,149.999,9% OFF,,Envío gratis,,,https://www.mercadolibre.com.ar/notebook-lenov...
4,"Notebook Dell Inspiron 3515 azul 15.5"", AMD Ry...",$,199.999,,,Envío gratis,185,,https://www.mercadolibre.com.ar/notebook-dell-...
...,...,...,...,...,...,...,...,...,...
219,Notebook Lenovo IdeaPad 15ALC6 arctic gray 15...,$,145.498,,,Envío gratis,164,,https://www.mercadolibre.com.ar/notebook-lenov...
220,"Notebook Asus X515EA gris 15.6"", Intel Core i5...",$,159.999,,,Envío gratis,879,,https://www.mercadolibre.com.ar/notebook-asus-...
221,Notebook Enova Intel Celeron 4gb 128gb W10,$,71.999,12% OFF,,Envío gratis,53,,https://www.mercadolibre.com.ar/notebook-enova...
222,"Apple Macbook Air (13 pulgadas, 2020, Chip M1,...",$,289.499,,,Envío gratis,486,,https://www.mercadolibre.com.ar/apple-macbook-...


## Scrappeamos página por página 

In [16]:
# Función de request. Vamos a llamarla cuando se necesite cambiar de página. Sólo le pasamos la URL

def request_url(url):
    html = requests.get(url.strip(), headers={'User-Agent': 'Mozilla/5.0'}).text
    soup = BeautifulSoup(html, "lxml")
    return soup

In [17]:
dict = {}
i = 0

# identificador de página en la url de MercadoLibre
pages = [1, 51, 101, 151, 201, 251, 301]

# La única diferencia respecto al anterior, es que ahora creamos un bucle por encima para que cargue una nuevapágina
# una vez que obtuvo la información de todos los productos de un página.
for page in pages:
    
    url = 'https://listado.mercadolibre.com.ar/computacion/laptops-accesorios/notebooks/notebook_Desde_' + str(page) + '_NoIndex_True' 
    soup = request_url(url)
    
    item_list = soup.find_all('div', class_="ui-search-result__content-wrapper shops__result-content-wrapper")
    
    for item in item_list:
        product_name = item.find('h2')
        product_price_symbol = item.find('span', {'class' : 'price-tag-symbol'})
        product_price_number = item.find('span', {'class' : 'price-tag-fraction'})
        product_discount = item.find('span', {'class' : 'ui-search-price__discount shops__price-discount'})
        product_cuotas = item.find('span', {'class' : 'ui-search-item__group__element shops__items-group-details ui-search-installments ui-search-color--LIGHT_GREEN'})
        product_shipping = item.find('p', {'class' : 'ui-search-item__shipping ui-search-item__shipping--free shops__item-shipping-free'})
        product_reviews = item.find('span', {'class' : 'ui-search-reviews__amount'})
        product_seller = item.find('p', {'class' : 'ui-search-official-store-label ui-search-item__group__element shops__items-group-details ui-search-color--GRAY'})
        product_label = item.find('label', {'class': 'ui-search-styled-label ui-search-item__highlight-label__text'})
        product_link = item.find('a').get('href')
        product_old_price = item.find('span', {'class':'price-tag-text-sr-only'})
        product_name = product_name.text

    #     If find_all() can’t find anything, it returns an empty list. If find() can’t find anything, it returns None

    #   .text is just a property that calls get_text. Therefore, calling get_text without arguments is 
    #   the same thing as .text. However, get_text can also support various keyword arguments to change how 
    #   it behaves (separator, strip, types). If you need more control over the result, 
    #   then you need the functional form.

        if product_price_symbol is not None:
            product_price_symbol = product_price_symbol.get_text()
        if product_price_number is not None:
            product_price_number = product_price_number.get_text()
        if product_discount is not None:
            product_discount = product_discount.get_text()
        if product_cuotas is not None:
            product_cuotas = product_cuotas.get_text()
        if product_shipping is not None:
            product_shipping = product_shipping.get_text()
        if product_reviews is not None:
            product_reviews = product_reviews.get_text()
        if product_seller is not None:
            product_seller = product_seller.get_text()
        if product_label is not None:
            product_label = product_label.get_text()
        if product_old_price is not None:
            product_old_price = product_old_price.get_text()
            

        dict.update({i:{
            'product_name': product_name,
            'price_tag_symbol': product_price_symbol,
            'product_price_number': product_price_number,
            'product_label': product_label,
            'product_old_price':product_old_price,
            'product_discount': product_discount,
            'product_cuotas': product_cuotas,
            'product_shipping': product_shipping,
            'product_reviews': product_reviews,
            'product_seller': product_seller,
            'product_link': product_link
        }})

        i= i + 1


In [19]:
df = pd.DataFrame(dict).transpose()

In [20]:
df

Unnamed: 0,product_name,price_tag_symbol,product_price_number,product_label,product_old_price,product_discount,product_cuotas,product_shipping,product_reviews,product_seller,product_link
0,"Notebook HP 14-dq2029la plata natural 14"", Int...",$,179.999,,179999 pesos,,Hasta 6 cuotas sin interés,,119,Vendido por HP Tienda Oficial,https://click1.mercadolibre.com.ar/mclics/clic...
1,"Notebook EXO Smart M33 gris 14.1"", Intel Celer...",$,108.999,OFERTA DEL DÍA,Antes: 108999 pesos,25% OFF,Hasta 12 cuotas sin interés,Envío gratis,58,Vendido por EXO,https://click1.mercadolibre.com.ar/mclics/clic...
2,"Notebook EXO Smart T33 gris 14"", Intel Celeron...",$,75.000,OFERTA DEL DÍA,Antes: 75000 pesos,13% OFF,,,22,,https://www.mercadolibre.com.ar/notebook-exo-s...
3,"Notebook Dell Inspiron 3515 plateada 15.5"", AM...",$,169.999,,Antes: 169999 pesos,7% OFF,Hasta 6 cuotas sin interés,,185,,https://www.mercadolibre.com.ar/notebook-dell-...
4,Notebook Lenovo IdeaPad 14IIL05 platinum gray...,$,149.999,,149999 pesos,,,,525,,https://www.mercadolibre.com.ar/notebook-lenov...
...,...,...,...,...,...,...,...,...,...,...,...
381,Notebook Hp Amd Ryzen 5 5500 8gb 256gb Ssd 15....,$,179.990,,179990 pesos,,Hasta 6 cuotas sin interés,Envío gratis,,,https://articulo.mercadolibre.com.ar/MLA-11264...
382,Notebook Xpg Xenia I7 11800h 32gb 1tb Rtx 3070...,$,487.026,,487026 pesos,,,Envío gratis,,,https://articulo.mercadolibre.com.ar/MLA-11440...
383,"Notebook HP 240 G7 plateado ceniza oscuro 14"",...",$,123.999,,123999 pesos,,,Envío gratis,1718,,https://www.mercadolibre.com.ar/notebook-hp-24...
384,Notebook Asus Core I5 X515 11° 12gb 15.6 500gb...,$,187.999,,187999 pesos,,,Envío gratis,,,https://articulo.mercadolibre.com.ar/MLA-11142...
