In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import numpy as np

In [7]:
#Function to get links
def fetch_webpage(url, headers, retries=3, wait=5):
    for i in range(retries):
        response = requests.get(url, headers=headers)
        if response.status_code == 200:
            return response
        elif response.status_code == 503:
            print(f"Service unavailable. Retrying in {wait} seconds...")
            time.sleep(wait)
    return None

#Function to extract Product Title
def get_title(soup):
    try:
        title = soup.find("h1").text
    except:
        title = ""
    return title

#Function to extract Product Price
def get_price(soup):
    try:
        price = soup.find("span",attrs={"class":"andes-money-amount__fraction"}).text
    except AttributeError:
        try:
            price = soup.find("span",attrs={"class":"andes-money-amount__fraction"}).text
        except:
            price = ""
    return price

def get_description(soup):
    try:
        description = soup.find("p",attrs={"class":"ui-pdp-description__content"}).text
    except AttributeError:
        description = ""
    return description

In [8]:
if __name__ == '__main__':
    
    #add your user agent
    HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36', 
    'Accept-Language': 'pt-BR, pt;q=0.5',
    'Accept-Encoding': 'gzip, deflate, br',
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
    'Connection': 'keep-alive'
}
    #webpage URL
    URL = "https://lista.mercadolivre.com.br/caixa-de-ferramenta#D[A:caixa%20de%20ferramenta]"

    #HTTP Request
    webpage = fetch_webpage(URL, HEADERS)

    #Soup object containing all data
    soup = BeautifulSoup(webpage.content, "html.parser")

    #Fetch links as list of tag objects
    links = soup.find_all('a',{'class': 'ui-search-item__group__element ui-search-link__title-card ui-search-link'})

    #Store the links
    links_list = []

    #loop for extracting links from tag objects
    for link in links:
        links_list.append(link.get('href'))

    dict = {'title':[], 'price':[], 'description':[]}

    #loop for extracting product details from each link
    for link in links_list:
        new_webpage = requests.get(link, headers = HEADERS)
        new_soup = BeautifulSoup(new_webpage.content, "html.parser")
        #Function calls to display all necessary product information
        dict['title'].append(get_title(new_soup))
        dict['price'].append(get_price(new_soup))
        dict['description'].append(get_description(new_soup))
        
    #To transform dictionary in dataframe
    ferramentas_df = pd.DataFrame.from_dict(dict)

In [9]:
#show the dataframe
ferramentas_df.head()

Unnamed: 0,title,price,description
0,Caixa de ferramentas DeWalt DWST17806 de plást...,295,Caixa Organizadora Ferramenta C/ Divisória N6 ...
1,Fumigador Grande Zatti Para Apicultura,162,"O Fumigador Grande Zatti Para Apicultura, é um..."
2,Furadeira de impacto Bosch GSB 450 RE com velo...,287,A Furadeira de impacto GSB 450 RE STD possuí o...
3,Tramontina 43800005 caixa para ferramentas san...,123,A Caixa Sanfonada para Ferramentas Tramontina ...
4,Kit Jogo De Ferramentas Completo C/ Soquetes E...,215,Distribuído por Ismafer FerramentasCOMPOSTO PO...


In [10]:
ferramentas_df.to_csv("ferramentas_mercadolivre.csv", header = True, index = False)