## Pegando dados de preços do mercado livre

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
from datetime import datetime
import sqlite3

In [2]:
# Seleciono plavras chave para procurar no site do mercado livre
search_commands = ['celular', 'tv', 'geladeira', 'microondas', 'fogao', 'iphone', 'som', 'luminaria', 'notebook']

### Mercado Livre

In [3]:
%%time
data = pd.DataFrame(columns=['product_name', 'product_price', 'product_search', 'search_date', 'website', 'url'])
n=0
errors={}
for search in search_commands:
    
    url = 'https://lista.mercadolivre.com.br/{}#D[A:{}]'.format(search, search)
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'html.parser') 
    items = soup.find_all('li', class_='ui-search-layout__item')
    
    # Salvo os dados de cada item do site em um dataframe
    for item in items:
        try:
            try:
                name = item.find('h2', class_="ui-search-item__title ui-search-item__group__element").text
            except:
                name = item.find('h2', class_="ui-search-item__title").text
            price = item.find('span', class_="price-tag-fraction").text.replace('.', '')

            data.loc[n, 'product_name'] = name
            data.loc[n, 'product_price'] = price
            data.loc[n, 'product_search'] = search
            data.loc[n, 'search_date'] = datetime.today()
            data.loc[n, 'website'] = 'Mercado Livre'
            data.loc[n, 'url'] = url
            n+=1
        except:
            errors.update({search:url})
            pass

Wall time: 16.4 s


### Submarino

In [4]:
%%time

for search in search_commands:
    
    url = 'https://www.submarino.com.br/busca/{}'.format(search)
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0'}
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser') 
    items = soup.find_all('div', class_='product-grid-item ProductGrid__GridColumn-sc-49j2r8-0 eZaEaE ColUI-gjy0oc-0 ifczFg ViewUI-sc-1ijittn-6 iXIDWU')
    
    
    # Salvo os dados de cada item do site em um dataframe
    for item in items:
        try:
            try:
                name = item.find('h2', class_='TitleUI-sc-1f5n3tj-13 dTabgr TitleH2-sc-1wh9e1x-1 fINzxm').text.strip()
                price = item.div.div.find('span', class_="PriceUI-sc-1f5n3tj-9 ebPdEH PriceUI-sc-1q8ynzz-0 inNBs TextUI-sc-12tokcy-0 CIZtP").text.strip().replace('R$', '').replace('.', '').replace(',', '.')
            except:
                price = item.div.div.find('span', class_="PriceUI-sc-1f5n3tj-9 RjuaG PriceUI-sc-1q8ynzz-0 inNBs TextUI-sc-12tokcy-0 CIZtP").text.strip().replace('R$', '').replace('.', '').replace(',', '.')
            
            data.loc[n, 'product_name'] = name
            data.loc[n, 'product_price'] = price
            data.loc[n, 'product_search'] = search
            data.loc[n, 'search_date'] = datetime.today()
            data.loc[n, 'website'] = 'Submarino'
            data.loc[n, 'url'] = url
            n+=1
        except:
            errors.update({search:url})
            pass

Wall time: 12.7 s


### Magazine Luiza

In [5]:
%%time

for search in search_commands:
    
    url = 'https://www.magazineluiza.com.br/busca/{}/'.format(search)
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0'}
    r = requests.get(url, headers=headers)
    soup = BeautifulSoup(r.content, 'html.parser') 
    items = soup.find_all('li', class_='product')
    
    
    # Salvo os dados de cada item do site em um dataframe
    for item in items:
        try:
          
            name = item.h3.text.strip()
            price = item.find('span', class_='price-value').text.replace('R$', '').replace('.','').strip().replace(',','.')
                
            
            data.loc[n, 'product_name'] = name
            data.loc[n, 'product_price'] = price
            data.loc[n, 'product_search'] = search
            data.loc[n, 'search_date'] = datetime.today()
            data.loc[n, 'website'] = 'Magazine Luiza'
            data.loc[n, 'url'] = url
            n+=1
        except:
            errors.update({search:[name, price]})
            pass

Wall time: 15.7 s


In [6]:
# Como a base de dados fica:
data

Unnamed: 0,product_name,product_price,product_search,search_date,website,url
0,Moto E6s (2020) Dual SIM 64 GB azul-navy 4 GB RAM,1299,celular,2020-11-18 12:28:51.924693,Mercado Livre,https://lista.mercadolivre.com.br/celular#D[A:...
1,Samsung Galaxy A31 Dual SIM 128 GB prism crush...,1849,celular,2020-11-18 12:28:51.925692,Mercado Livre,https://lista.mercadolivre.com.br/celular#D[A:...
2,Samsung Galaxy A01 Core Dual SIM 32 GB azul 2 ...,1099,celular,2020-11-18 12:28:51.927691,Mercado Livre,https://lista.mercadolivre.com.br/celular#D[A:...
3,Samsung Galaxy A21s Dual SIM 64 GB preto 4 GB RAM,1599,celular,2020-11-18 12:28:51.929690,Mercado Livre,https://lista.mercadolivre.com.br/celular#D[A:...
4,Samsung Galaxy A31 Dual SIM 128 GB prism crush...,1849,celular,2020-11-18 12:28:51.930689,Mercado Livre,https://lista.mercadolivre.com.br/celular#D[A:...
...,...,...,...,...,...,...
951,Notebook Gamer Samsung Odyssey Intel Core i5 -...,5699.05,notebook,2020-11-18 12:29:33.986287,Magazine Luiza,https://www.magazineluiza.com.br/busca/notebook/
952,Notebook Lenovo Ideapad S145 Intel Core i7 8GB...,4369.05,notebook,2020-11-18 12:29:33.987286,Magazine Luiza,https://www.magazineluiza.com.br/busca/notebook/
953,Notebook Intel 2GB 32GB Multilaser Legacy Clou...,1139.05,notebook,2020-11-18 12:29:33.989285,Magazine Luiza,https://www.magazineluiza.com.br/busca/notebook/
954,Notebook Dell Inspiron i15-3584-U30P 8ª geraçã...,2896.55,notebook,2020-11-18 12:29:33.991284,Magazine Luiza,https://www.magazineluiza.com.br/busca/notebook/


In [7]:
# Nenhum erro foi apresentado
errors

{'fogao': ['Fogão 5 Bocas Mesa de Inox Forno Easy Clean Acendimento Manual Trempe Esmaltado - Braslar',
  '1003.47'],
 'som': ['Radio Som Mp3 Player Automotivo Carro Bluetooth First Option USB com Controle - Manfer Com. E Importacao Ltda',
  '218.41'],
 'celular': ['iPhone 11 Apple 128GB Preto 6,1” 12MP iOS - MHDH3BZ/A',
  '929.07'],
 'tv': ['Smart TV Full HD LED 43” AOC 43S5195/78G - Wi-Fi 3 HDMI 1 USB',
  '3324.05'],
 'geladeira': ['Geladeira Brastemp Frost Free Inverse 573 litros cor Inox com Smart Bar -',
  '4139.91'],
 'microondas': ['Micro-ondas Philco PMO25B 25 Litros Branco  220 Volts -',
  '486.00'],
 'iphone': ['iPhone 8 Plus Apple 64GB Dourado 5,5” 12MP - iOS', '5579.07'],
 'luminaria': ['Luminária de Mesa LED  - Azul Taschibra TLM 03', '141.55'],
 'notebook': ['Notebook Dell Inspiron 15 3000 i15-3583-D05P - Intel Pentium Gold 4GB 500GB 15,6” Linux',
  '2944.05']}

### Curiosidade

In [8]:
# Ticket médio da busca por 'celular'
data[data['product_search']=='celular']['product_price'].astype('float').mean()

1490.7963106796121

In [9]:
# Ticket médio da busca por 'iphone'
data[data['product_search']=='iphone']['product_price'].astype('float').mean()

5593.92112244898

In [10]:
data[(data['website']=='Magazine Luiza')]

Unnamed: 0,product_name,product_price,product_search,search_date,website,url
688,Smartphone Samsung Galaxy A01 32GB Vermelho - ...,728.99,celular,2020-11-18 12:29:19.359654,Magazine Luiza,https://www.magazineluiza.com.br/busca/celular/
689,Smartphone Samsung Galaxy A71 128GB Cinza 4G -...,1979.10,celular,2020-11-18 12:29:19.361475,Magazine Luiza,https://www.magazineluiza.com.br/busca/celular/
690,Smartphone Samsung Galaxy A21s 64GB Preto 4G -...,1394.07,celular,2020-11-18 12:29:19.363491,Magazine Luiza,https://www.magazineluiza.com.br/busca/celular/
691,Smartphone Motorola Moto G9 Play 64GB Azul Saf...,1250.99,celular,2020-11-18 12:29:19.364491,Magazine Luiza,https://www.magazineluiza.com.br/busca/celular/
692,Smartphone Samsung Galaxy A01 Core 32GB Azul -...,836.07,celular,2020-11-18 12:29:19.366489,Magazine Luiza,https://www.magazineluiza.com.br/busca/celular/
...,...,...,...,...,...,...
951,Notebook Gamer Samsung Odyssey Intel Core i5 -...,5699.05,notebook,2020-11-18 12:29:33.986287,Magazine Luiza,https://www.magazineluiza.com.br/busca/notebook/
952,Notebook Lenovo Ideapad S145 Intel Core i7 8GB...,4369.05,notebook,2020-11-18 12:29:33.987286,Magazine Luiza,https://www.magazineluiza.com.br/busca/notebook/
953,Notebook Intel 2GB 32GB Multilaser Legacy Clou...,1139.05,notebook,2020-11-18 12:29:33.989285,Magazine Luiza,https://www.magazineluiza.com.br/busca/notebook/
954,Notebook Dell Inspiron i15-3584-U30P 8ª geraçã...,2896.55,notebook,2020-11-18 12:29:33.991284,Magazine Luiza,https://www.magazineluiza.com.br/busca/notebook/
