# Webscrapping de datos de MercadoLibre


In [2]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
import time

## Scrappeamos una sóla página 

In [3]:
# Definimos la URL para scrappear
# url = 'https://www.mercadolibre.com.ar/mas-vendidos#menu=categories' 
url = 'https://listado.mercadolibre.com.ar/notebook#trends_tracking_id=61b3c37c-e0da-4304-ac5f-cb66f17b876e&component_id=MOST_WANTED'

# Hacemos el request de la url, y luego lo metemos en BeautifulSoup
html = requests.get(url.strip(), headers={'User-Agent': 'Mozilla/5.0'}).text

In [5]:
html

'\n<!DOCTYPE html>\n<html lang="es-AR">\n<head><link rel="preconnect" href="https://www.google-analytics.com"/><link rel="preconnect" href="https://www.google.com"/><link rel="preconnect" href="https://data.mercadolibre.com"/><link rel="preconnect" href="https://http2.mlstatic.com"/><link rel="preconnect" href="https://stats.g.doubleclick.net"/><link rel="preconnect" href="https://analytics.mercadolibre.com.ar"/><link rel="preconnect" href="https://analytics.mercadolibre.com"/><link rel="preconnect" href="https://www.google.com.ar"/><script type=\'text/javascript\'>window.NREUM||(NREUM={});NREUM.info = {"agent":"","beacon":"bam.nr-data.net","errorBeacon":"bam.nr-data.net","licenseKey":"NRBR-766f4fb616d3a2368ce","applicationID":"1729522169","agentToken":null,"applicationTime":1030.658199,"transactionName":"YlZQYEVZC0QEV0BZV1scd0xHSgBEFl5HH39wZx0bHQ==","queueTime":0,"ttGuid":"31acbebe6081ac9a"}; (window.NREUM||(NREUM={})).init={privacy:{cookies_enabled:true},ajax:{deny_list:["bam.nr-data

In [4]:
soup = BeautifulSoup(html, "lxml")

In [6]:
soup

<!DOCTYPE html>
<html lang="es-AR">
<head><link href="https://www.google-analytics.com" rel="preconnect"/><link href="https://www.google.com" rel="preconnect"/><link href="https://data.mercadolibre.com" rel="preconnect"/><link href="https://http2.mlstatic.com" rel="preconnect"/><link href="https://stats.g.doubleclick.net" rel="preconnect"/><link href="https://analytics.mercadolibre.com.ar" rel="preconnect"/><link href="https://analytics.mercadolibre.com" rel="preconnect"/><link href="https://www.google.com.ar" rel="preconnect"/><script type="text/javascript">window.NREUM||(NREUM={});NREUM.info = {"agent":"","beacon":"bam.nr-data.net","errorBeacon":"bam.nr-data.net","licenseKey":"NRBR-766f4fb616d3a2368ce","applicationID":"1729522169","agentToken":null,"applicationTime":1030.658199,"transactionName":"YlZQYEVZC0QEV0BZV1scd0xHSgBEFl5HH39wZx0bHQ==","queueTime":0,"ttGuid":"31acbebe6081ac9a"}; (window.NREUM||(NREUM={})).init={privacy:{cookies_enabled:true},ajax:{deny_list:["bam.nr-data.net"]}

In [7]:
# Definimos un dict en blanco para cargarle los resultados en cada iteración
# i es el identificador del producto como clave principal del diccionario
dict = {}
i = 0

# Obtenemos el cuadro principal de los resultados, donde se encuentran divs mas pequeños con 
# la especificación de cada producto
item_list = soup.find_all('div', class_="ui-search-result__content-wrapper shops__result-content-wrapper")

# Ahora en item vamos guardando cada div de cada producto, y le extraemos las variables de principal interés
for item in item_list:
    product_name = item.find('h2')
    product_name = product_name.text
    product_price_symbol = item.find('span', {'class' : 'price-tag-symbol'})
    product_price_number = item.find('span', {'class' : 'price-tag-fraction'})
    product_discount = item.find('span', {'class' : 'ui-search-price__discount shops__price-discount'})
    product_cuotas = item.find('span', {'class' : 'ui-search-item__group__element shops__items-group-details ui-search-installments ui-search-color--LIGHT_GREEN'})
    product_shipping = item.find('p', {'class' : 'ui-search-item__shipping ui-search-item__shipping--free shops__item-shipping-free'}).text
    product_reviews = item.find('span', {'class' : 'ui-search-reviews__amount'})
    product_seller = item.find('p', {'class' : 'ui-search-official-store-label ui-search-item__group__element shops__items-group-details ui-search-color--GRAY'})
    product_link = item.find('a').get('href')
    
#   If find_all() can’t find anything, it returns an empty list. If find() can’t find anything, it returns None
    
#   .text is just a property that calls get_text. Therefore, calling get_text without arguments is 
#   the same thing as .text. However, get_text can also support various keyword arguments to change how 
#   it behaves (separator, strip, types). If you need more control over the result, 
#   then you need the functional form.


#   Sacamos el text de las etiquetas

    if product_price_symbol is not None:
        product_price_symbol = product_price_symbol.get_text()
    if product_price_number is not None:
        product_price_number = product_price_number.get_text()
    if product_discount is not None:
        product_discount = product_discount.get_text()
    if product_cuotas is not None:
        product_cuotas = product_cuotas.get_text()
    if product_reviews is not None:
        product_reviews = product_reviews.get_text()
    if product_seller is not None:
        product_seller = product_seller.get_text()
    
# Guardamos los resultados a un diccionario anidado.
    dict.update({i:{
        'product_name': product_name,
        'price_tag_symbol': product_price_symbol,
        'product_price_number': product_price_number,
        'product_discount': product_discount,
        'product_cuotas': product_cuotas,
        'product_shipping': product_shipping,
        'product_reviews': product_reviews,
        'product_seller': product_seller,
        'product_link': product_link
    }})

# Aumentamos el valor de i, para que el próximo producto tenga un id mayor al corriente    
    i= i + 1
    

In [8]:
dict

{0: {'product_name': 'Notebook Asus Vivobook 15 X1502 azul 15.6", Intel Core i5 1240P  8GB de RAM 256GB SSD, Intel Iris Xe Graphics G7 80EUs Windows 11 Home',
  'price_tag_symbol': '$',
  'product_price_number': '449.999',
  'product_discount': '18% OFF',
  'product_cuotas': 'Mismo precio en 6 cuotas de $61.499',
  'product_shipping': 'Envío gratis',
  'product_reviews': '122',
  'product_seller': 'Vendido por ASUS',
  'product_link': 'https://www.mercadolibre.com.ar/notebook-asus-vivobook-15-x1502-azul-156-intel-core-i5-1240p-8gb-de-ram-256gb-ssd-intel-iris-xe-graphics-g7-80eus-windows-11-home/p/MLA19710772?pdp_filters=category:MLA1652#searchVariation=MLA19710772&position=1&search_layout=stack&type=product&tracking_id=7b8f3426-6ba6-4456-838f-d720c968b3c1'},
 1: {'product_name': 'Notebook Asus I7 Vivobook 15 512gb Ssd 16gb Ram 4,7 Ghz',
  'price_tag_symbol': '$',
  'product_price_number': '644.999',
  'product_discount': '15% OFF',
  'product_cuotas': 'Mismo precio en 6 cuotas de $91.3

In [11]:
# Creamos un dataframe a partir de un dict. Tenemos que aplicar transpose para que cambie filas por columnas 
df = pd.DataFrame(dict).transpose()

In [12]:
df

Unnamed: 0,product_name,price_tag_symbol,product_price_number,product_discount,product_cuotas,product_shipping,product_reviews,product_seller,product_link
0,"Notebook Asus Vivobook 15 X1502 azul 15.6"", In...",$,449.999,18% OFF,Mismo precio en 6 cuotas de $61.499,Envío gratis,122.0,Vendido por ASUS,https://www.mercadolibre.com.ar/notebook-asus-...
1,Notebook Asus I7 Vivobook 15 512gb Ssd 16gb Ra...,$,644.999,15% OFF,Mismo precio en 6 cuotas de $91.374,Envío gratis,130.0,Vendido por ASUS,https://www.mercadolibre.com.ar/notebook-asus-...
2,Notebook Lenovo IdeaPad 3i Intel I5 1135g7 Ram...,$,287.028,,,Envío gratis,17.0,,https://www.mercadolibre.com.ar/notebook-lenov...
3,Notebook Iqual Nq1 Intel Celeron N4020 4gb 128...,$,109.999,9% OFF,,Envío gratis,40.0,Vendido por IQUAL,https://www.mercadolibre.com.ar/notebook-iqual...
4,"Notebook HP 15-ef2529la plata natural 15.6"", A...",$,459.999,,,Envío gratis,11.0,Vendido por HP Tienda Oficial,https://www.mercadolibre.com.ar/notebook-hp-15...
5,Notebook Lenovo IdeaPad 15ITL6 arctic gray 15...,$,419.999,11% OFF,,Envío gratis,233.0,,https://www.mercadolibre.com.ar/notebook-lenov...
6,"Ultrabook Gadnic Cloudbook Glow Pro gray 14.1""...",$,154.999,18% OFF,Mismo precio en 6 cuotas de $20.999,Envío gratis,113.0,Vendido por Gadnic,https://www.mercadolibre.com.ar/ultrabook-gadn...
7,Notebook Hp 15-dy5001la Intel I5 1235u 8gb 512...,$,529.999,20% OFF,Mismo precio en 6 cuotas de $70.351,Envío gratis,57.0,Vendido por HP Tienda Oficial,https://www.mercadolibre.com.ar/notebook-hp-15...
8,Notebook Hp Elitebook 840 G3 I5 6ta 256 Ssd 8g...,$,292.000,7% OFF,Mismo precio en 6 cuotas de $45.077,Envío gratis,,Vendido por CPT Oficina,https://click1.mercadolibre.com.ar/mclics/clic...
9,Notebook Bangho Max L5 Intel I5 8gb Ssd 240g 1...,$,434.999,6% OFF,Mismo precio en 6 cuotas de $67.849,Envío gratis,,Vendido por Bidcom,https://click1.mercadolibre.com.ar/mclics/clic...


## Scrappeamos página por página 

In [14]:
# Función de request. Vamos a llamarla cuando se necesite cambiar de página. Sólo le pasamos la URL

def request_url(url):
    html = requests.get(url.strip(), headers={'User-Agent': 'Mozilla/5.0'}).text
    soup = BeautifulSoup(html, "lxml")
    return soup

In [15]:
dict = {}
i = 0

# identificador de página en la url de MercadoLibre
pages = [1, 51, 101, 151, 201, 251, 301]

# La única diferencia respecto al anterior, es que ahora creamos un bucle por encima para que cargue una nuevapágina
# una vez que obtuvo la información de todos los productos de un página.
for page in pages:
    
    url = 'https://listado.mercadolibre.com.ar/computacion/laptops-accesorios/notebooks/notebook_Desde_' + str(page) + '_NoIndex_True' 
    soup = request_url(url)
    
    item_list = soup.find_all('div', class_="ui-search-result__content-wrapper shops__result-content-wrapper")
    
    for item in item_list:
        product_name = item.find('h2')
        product_price_symbol = item.find('span', {'class' : 'price-tag-symbol'})
        product_price_number = item.find('span', {'class' : 'price-tag-fraction'})
        product_discount = item.find('span', {'class' : 'ui-search-price__discount shops__price-discount'})
        product_cuotas = item.find('span', {'class' : 'ui-search-item__group__element shops__items-group-details ui-search-installments ui-search-color--LIGHT_GREEN'})
        product_shipping = item.find('p', {'class' : 'ui-search-item__shipping ui-search-item__shipping--free shops__item-shipping-free'})
        product_reviews = item.find('span', {'class' : 'ui-search-reviews__amount'})
        product_seller = item.find('p', {'class' : 'ui-search-official-store-label ui-search-item__group__element shops__items-group-details ui-search-color--GRAY'})
        product_label = item.find('label', {'class': 'ui-search-styled-label ui-search-item__highlight-label__text'})
        product_link = item.find('a').get('href')
        product_old_price = item.find('span', {'class':'price-tag-text-sr-only'})
        product_name = product_name.text

    #     If find_all() can’t find anything, it returns an empty list. If find() can’t find anything, it returns None

    #   .text is just a property that calls get_text. Therefore, calling get_text without arguments is 
    #   the same thing as .text. However, get_text can also support various keyword arguments to change how 
    #   it behaves (separator, strip, types). If you need more control over the result, 
    #   then you need the functional form.

        if product_price_symbol is not None:
            product_price_symbol = product_price_symbol.get_text()
        if product_price_number is not None:
            product_price_number = product_price_number.get_text()
        if product_discount is not None:
            product_discount = product_discount.get_text()
        if product_cuotas is not None:
            product_cuotas = product_cuotas.get_text()
        if product_shipping is not None:
            product_shipping = product_shipping.get_text()
        if product_reviews is not None:
            product_reviews = product_reviews.get_text()
        if product_seller is not None:
            product_seller = product_seller.get_text()
        if product_label is not None:
            product_label = product_label.get_text()
        if product_old_price is not None:
            product_old_price = product_old_price.get_text()
            

        dict.update({i:{
            'product_name': product_name,
            'price_tag_symbol': product_price_symbol,
            'product_price_number': product_price_number,
            'product_label': product_label,
            'product_old_price':product_old_price,
            'product_discount': product_discount,
            'product_cuotas': product_cuotas,
            'product_shipping': product_shipping,
            'product_reviews': product_reviews,
            'product_seller': product_seller,
            'product_link': product_link
        }})

        i= i + 1


In [16]:
df = pd.DataFrame(dict).transpose()

In [17]:
df

Unnamed: 0,product_name,price_tag_symbol,product_price_number,product_label,product_old_price,product_discount,product_cuotas,product_shipping,product_reviews,product_seller,product_link
0,"Ultrabook Gadnic Cloudbook Glow Pro gray 14.1""...",$,154.999,,Antes: 154999 pesos,18% OFF,Mismo precio en 6 cuotas de $20.999,,113,Vendido por Gadnic,https://click1.mercadolibre.com.ar/mclics/clic...
1,"Notebook Banghó Max L5 i7 gris oscura 15.6"", I...",$,596.899,,Antes: 596899 pesos,34% OFF,,,43,Vendido por Bidcom,https://click1.mercadolibre.com.ar/mclics/clic...
2,"Notebook Asus Vivobook 15 X1502 azul 15.6"", In...",$,449.999,OFERTA DEL DÍA,Antes: 449999 pesos,18% OFF,Mismo precio en 6 cuotas de $61.499,Envío gratis,122,Vendido por ASUS,https://www.mercadolibre.com.ar/notebook-asus-...
3,Notebook Asus I7 Vivobook 15 512gb Ssd 16gb Ra...,$,644.999,OFERTA DEL DÍA,Antes: 644999 pesos,15% OFF,Mismo precio en 6 cuotas de $91.374,Envío gratis,130,Vendido por ASUS,https://www.mercadolibre.com.ar/notebook-asus-...
4,"Notebook HP 15-ef2529la plata natural 15.6"", A...",$,459.999,MÁS VENDIDO,459999 pesos,,,,11,Vendido por HP Tienda Oficial,https://www.mercadolibre.com.ar/notebook-hp-15...
...,...,...,...,...,...,...,...,...,...,...,...
375,Notebook Nsx Kairos Ultraslim Intel I5 16gb Ss...,$,401.248,,401248 pesos,,,Envío gratis,,,https://articulo.mercadolibre.com.ar/MLA-11668...
376,"Notebook Banghó Max L5 i3 gris oscura 15.6"", I...",$,284.999,,284999 pesos,,,Envío gratis,54,,https://www.mercadolibre.com.ar/notebook-bangh...
377,Notebook Lenovo Ideapad 3 Intel I7 256gb Ssd 8...,$,449.999,,449999 pesos,,,Envío gratis,,Vendido por Start,https://articulo.mercadolibre.com.ar/MLA-13686...
378,Notebook Bangho Max L5 Core I7 Ram 8gb Ssd 480...,$,202.999,,202999 pesos,,,Envío gratis,,,https://articulo.mercadolibre.com.ar/MLA-13646...


In [24]:
primera = [0, 1, 2, 3, 4]
segunda = ['a', 'b', 'c', 'd']

In [25]:
for i in primera:
    print(i)
    print("------------")
    for b in segunda:
        print("                 " +str(b))
        if b = 'b':
            exit
        print('                  ===============')

0
------------
                 a
                 b
                 c
                 d
1
------------
                 a
                 b
                 c
                 d
2
------------
                 a
                 b
                 c
                 d
3
------------
                 a
                 b
                 c
                 d
4
------------
                 a
                 b
                 c
                 d
