# Scraping usando BS4

In [12]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re

## Conectando com a pagina da Fundamentus

In [13]:
url = 'https://www.fundamentus.com.br/resultado.php'

headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}

response = requests.get(url, headers=headers)


In [14]:
# Verificando se a requisição foi bem-sucedida
if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Procurando a tabela de resultados
    table = soup.find('table', {'id': 'resultado'})  # Tabela com o id 'resultado'
    
    # Extraindo os cabeçalhos da tabela
    headers = [header.text for header in table.find_all('th')]
    
    # Extraindo os dados da tabela
    rows = []
    for row in table.find_all('tr')[1:]:  # Pulando o cabeçalho
        rows.append([cell.text.strip() for cell in row.find_all('td')])
    
    # Convertendo para um DataFrame do pandas
    df = pd.DataFrame(rows, columns=headers)
    
    # Exibindo DataFrame
    pd.DataFrame(df)

    # Salvando os dados em CSV, se necessário
    df.to_csv('resultados_fundamentus.csv', index=False, sep=';')

else:
    print(f'Erro ao acessar o site: {response.status_code}')


In [15]:
df.head(10)

Unnamed: 0,Papel,Cotação,P/L,P/VP,PSR,Div.Yield,P/Ativo,P/Cap.Giro,P/EBIT,P/Ativ Circ.Liq,...,EV/EBITDA,Mrg Ebit,Mrg. Líq.,Liq. Corr.,ROIC,ROE,Liq.2meses,Patrim. Líq,Dív.Brut/ Patrim.,Cresc. Rec.5a
0,POPR4,1017,0,0,0,"0,00%",0,0,0,0,...,0,"8,66%","5,65%",108,"15,25%","19,93%",0,"545.803.000,00",82,"30,93%"
1,IVTT3,000,0,0,0,"0,00%",0,0,0,0,...,0,"0,00%","0,00%",0,"0,00%","-0,40%",0,"1.083.050.000,00",0,"20,67%"
2,CSTB4,14769,0,0,0,"0,00%",0,0,0,0,...,0,"40,85%","28,98%",260,"22,40%","20,11%",0,"8.420.670.000,00",14,"31,91%"
3,MNSA4,047,0,0,0,"0,00%",0,0,0,0,...,0,"-208,15%","-362,66%",363,"-13,50%","145,70%",0,"-9.105.000,00",-652,"-41,11%"
4,MNSA3,042,0,0,0,"0,00%",0,0,0,0,...,0,"-208,15%","-362,66%",363,"-13,50%","145,70%",0,"-9.105.000,00",-652,"-41,11%"
5,PMET3,000,0,0,0,"0,00%",0,0,0,0,...,0,"0,00%","0,00%",0,"0,00%","4,10%",0,"-290.863.000,00",0,"37,74%"
6,CSTB3,15000,0,0,0,"0,00%",0,0,0,0,...,0,"40,85%","28,98%",260,"22,40%","20,11%",0,"8.420.670.000,00",14,"31,91%"
7,CLAN3,000,0,0,0,"0,00%",0,0,0,0,...,0,"0,00%","0,00%",0,"0,00%","-1,05%",0,"1.012.240.000,00",0,"-63,96%"
8,CFLU4,"1.000,00",0,0,0,"0,00%",0,0,0,0,...,0,"8,88%","10,72%",110,"17,68%","32,15%",0,"60.351.000,00",6,"8,14%"
9,PORP4,240,0,0,0,"0,00%",0,0,0,0,...,0,"0,00%","0,00%",0,"0,00%","-2,08%",0,"22.399.000,00",0,"13,66%"


## Tratamento de dados

In [16]:
# Função para remover '%' e separadores de milhar, e converter string para float
def clean_percentage(value):
    if isinstance(value, str):
        # Remover separadores de milhar e o símbolo de percentual
        value = re.sub(r'\.(?=\d{3,3}\b)', '', value)  # Remove pontos de milhar
        value = value.replace('%', '').replace(',', '.').strip()  # Remove '%' e substitui ',' por '.'
        return float(value) if value else None
    return value

# Função para remover separadores de milhar e manter o ponto decimal
def clean_numeric(value):
    if isinstance(value, str):
        # Primeiro, remover os pontos que são separadores de milhar
        value = re.sub(r'\.(?=\d{3,3}\b)', '', value)  # Remove apenas o ponto que é separador de milhar
        # Depois, trocar a vírgula decimal por ponto
        value = value.replace(',', '.').strip()
        return float(value) if value else None
    return value

# Aplicando as funções para tratamento das colunas numéricas e percentuais
df['Cotação'] = df['Cotação'].apply(clean_numeric)
df['P/L'] = df['P/L'].apply(clean_numeric)
df['P/VP'] = df['P/VP'].apply(clean_numeric)
df['PSR'] = df['PSR'].apply(clean_numeric)
df['Div.Yield'] = df['Div.Yield'].apply(clean_percentage)
df['P/Ativo'] = df['P/Ativo'].apply(clean_numeric)
df['P/Cap.Giro'] = df['P/Cap.Giro'].apply(clean_numeric)
df['P/EBIT'] = df['P/EBIT'].apply(clean_numeric)
df['P/Ativ Circ.Liq'] = df['P/Ativ Circ.Liq'].apply(clean_numeric)
df['EV/EBIT'] = df['EV/EBIT'].apply(clean_numeric)
df['EV/EBITDA'] = df['EV/EBITDA'].apply(clean_numeric)
df['Mrg Ebit'] = df['Mrg Ebit'].apply(clean_percentage)
df['Mrg. Líq.'] = df['Mrg. Líq.'].apply(clean_percentage)
df['Liq. Corr.'] = df['Liq. Corr.'].apply(clean_numeric)
df['ROIC'] = df['ROIC'].apply(clean_percentage)
df['ROE'] = df['ROE'].apply(clean_percentage)
df['Liq.2meses'] = df['Liq.2meses'].apply(clean_numeric)
df['Patrim. Líq'] = df['Patrim. Líq'].apply(clean_numeric)
df['Dív.Brut/ Patrim.'] = df['Dív.Brut/ Patrim.'].apply(clean_numeric)
df['Cresc. Rec.5a'] = df['Cresc. Rec.5a'].apply(clean_percentage)

# Exibindo os tipos de dados após a conversão
print(df.dtypes)


Papel                 object
Cotação              float64
P/L                  float64
P/VP                 float64
PSR                  float64
Div.Yield            float64
P/Ativo              float64
P/Cap.Giro           float64
P/EBIT               float64
P/Ativ Circ.Liq      float64
EV/EBIT              float64
EV/EBITDA            float64
Mrg Ebit             float64
Mrg. Líq.            float64
Liq. Corr.           float64
ROIC                 float64
ROE                  float64
Liq.2meses           float64
Patrim. Líq          float64
Dív.Brut/ Patrim.    float64
Cresc. Rec.5a        float64
dtype: object


In [17]:
df

Unnamed: 0,Papel,Cotação,P/L,P/VP,PSR,Div.Yield,P/Ativo,P/Cap.Giro,P/EBIT,P/Ativ Circ.Liq,...,EV/EBITDA,Mrg Ebit,Mrg. Líq.,Liq. Corr.,ROIC,ROE,Liq.2meses,Patrim. Líq,Dív.Brut/ Patrim.,Cresc. Rec.5a
0,POPR4,10.17,0.00,0.00,0.000,0.0,0.000,0.00,0.00,0.00,...,0.00,8.66,5.65,1.08,15.25,19.93,0.0,5.458030e+08,0.82,30.93
1,IVTT3,0.00,0.00,0.00,0.000,0.0,0.000,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,-0.40,0.0,1.083050e+09,0.00,20.67
2,CSTB4,147.69,0.00,0.00,0.000,0.0,0.000,0.00,0.00,0.00,...,0.00,40.85,28.98,2.60,22.40,20.11,0.0,8.420670e+09,0.14,31.91
3,MNSA4,0.47,0.00,0.00,0.000,0.0,0.000,0.00,0.00,0.00,...,0.00,-208.15,-362.66,3.63,-13.50,145.70,0.0,-9.105000e+06,-6.52,-41.11
4,MNSA3,0.42,0.00,0.00,0.000,0.0,0.000,0.00,0.00,0.00,...,0.00,-208.15,-362.66,3.63,-13.50,145.70,0.0,-9.105000e+06,-6.52,-41.11
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
982,UBBR3,18.00,1466.61,4.77,0.000,0.0,0.000,0.00,0.00,0.00,...,0.00,0.00,0.00,0.00,0.00,0.33,0.0,1.031720e+10,0.00,10.58
983,CEPE3,128.00,1910.07,5.88,1.143,0.0,0.691,37.60,8.14,-1.10,...,10.86,14.04,0.06,1.08,9.34,0.31,0.0,1.624000e+09,5.20,1.81
984,SHOW3,1.05,2831.35,0.43,0.145,0.0,0.158,2.59,2.96,-1.48,...,2.19,4.89,-0.04,1.13,9.16,0.02,327642.0,1.645830e+08,0.49,79.20
985,SEER3,6.79,3099.37,0.70,0.464,0.0,0.249,4.36,3.28,-0.65,...,3.05,14.14,0.01,1.27,8.45,0.02,2649880.0,1.240840e+09,0.75,11.26
