In [1]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from io import StringIO
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Table, TableStyle, Paragraph, Spacer
from reportlab.lib.styles import ParagraphStyle
from reportlab.lib import colors

def generatePDF(stocks):
  filename = 'stocks.pdf'
  pdf = SimpleDocTemplate(filename, pagesize=A4)

  data = [stocks.columns.tolist()] + stocks.values.tolist()

  table = Table(data)

  style = TableStyle([
    ('BACKGROUND', (0, 0), (-1, 0), colors.grey),
    ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
    ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
    ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
    ('BOTTOMPADDING', (0, 0), (-1, 0), 12),
    ('BACKGROUND', (0, 1), (-1, -1), colors.beige),
    ('GRID', (0, 0), (-1, -1), 1, colors.black)
  ])
  table.setStyle(style)

  title = "Relatório de Ações"
  title_style = ParagraphStyle(name='Title', fontSize=16, textColor=colors.black)
  title_paragraph = Paragraph(title, title_style)

  description = "Ações da bolsa brasileira de valores, com os seguintes indicadores filtrados:"
  description_style = ParagraphStyle(name='Paragraph', fontSize=12, textColor=colors.gray)
  description_paragraph = Paragraph(description, description_style)

  bullet_list_style = ParagraphStyle(name='BulletList', fontSize=12, textColor=colors.black)
  bullet_list_1 = Paragraph('<b>• DY:</b> Maior que 6% e menor que 12%', bullet_list_style)
  bullet_list_2 = Paragraph('<b>• ROE:</b> Maior que 12% e menor que 20%', bullet_list_style)
  bullet_list_3 = Paragraph('<b>• P/L:</b> Maior que 3 e menor que 10', bullet_list_style)
  bullet_list_3 = Paragraph('<b>• P/VP:</b> Maior que 0.5 e menor que 2', bullet_list_style)
  bullet_list_4 = Paragraph('<b>• Liq. Diária (ultimos 2 meses):</b> Maior 1.000.000', bullet_list_style)
  bullet_list_5 = Paragraph('<b>• Crescimento (ultimos 5 anos):</b> Maior que 10%', bullet_list_style)

  pdf_content = [
    title_paragraph,
    Spacer(1, 8),
    description_paragraph,
    Spacer(1, 20),
    bullet_list_1,
    Spacer(1, 2),
    bullet_list_2,
    Spacer(1, 2),
    bullet_list_3,
    Spacer(1, 2),
    bullet_list_4,
    Spacer(1, 2),
    bullet_list_5,
    Spacer(1, 20),
    table
  ]

  pdf.build(pdf_content)

  print(f"PDF generated successfully: {filename}")

def collectDataFromWebScrapping() -> pd.DataFrame:
  url = 'https://www.fundamentus.com.br/resultado.php'

  headers = {
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36'
  }

  data = "pl_min=&pl_max=&pvp_min=&pvp_max=&psr_min=&psr_max=&divy_min=&divy_max=&pativos_min=&pativos_max=&pcapgiro_min=&pcapgiro_max=&pebit_min=&pebit_max=&fgrah_min=&fgrah_max=&firma_ebit_min=&firma_ebit_max=&firma_ebitda_min=&firma_ebitda_max=&margemebit_min=&margemebit_max=&margemliq_min=&margemliq_max=&liqcorr_min=&liqcorr_max=&roic_min=&roic_max=&roe_min=&roe_max=&liq_min=&liq_max=&patrim_min=&patrim_max=&divliq_min=&divliq_max=&tx_cresc_rec_min=&tx_cresc_rec_max=&valor_mercado_min=&valor_mercado_max=&setor=&submit="

  response = requests.post(
    url=url,
    data=data,
    headers=headers
  )

  if response.status_code == 200:
    soup = BeautifulSoup(response.content, 'html.parser')

    table = soup.find('table')
    dataFrame = pd.read_html(StringIO(str(table)), thousands='')[0]
    dataFrame.columns = dataFrame.columns.str.upper()

    return parseData(dataFrame)

def getFilteredStocks(data: pd.DataFrame) -> pd.DataFrame:
  filters = {
    'dy': {
      'min': 6,
      'max': 14
    },
    'roe': {
      'min': 12,
      'max': 20
    },
    'pl': {
      'min': 3,
      'max': 10
    },
    'p/vp': {
      'min': 0.5,
      'max': 2
    },
    'liquidez': 1000000,
    'crescimento': 10
  }

  filtered_data = data[
    (data['DIV.YIELD'].between(filters['dy']['min'], filters['dy']['max'])) &
    (data['P/L'].between(filters['pl']['min'], filters['pl']['max'])) &
    (data['P/VP'].between(filters['p/vp']['min'], filters['p/vp']['max'])) &
    (data['ROE'].between(filters['roe']['min'], filters['roe']['max'])) &
    (data['LIQ.2MESES'] >= filters['liquidez']) &
    (data['CRESC. REC.5A'] >= filters['crescimento'])
  ]
  
  return filtered_data

def parseData(data: pd.DataFrame) -> pd.DataFrame:
  data['DIV.YIELD'] = data['DIV.YIELD'].str.replace('%', '').str.replace('.', '').str.replace(',', '.').astype(float)
  data['ROE'] = data['ROE'].str.rstrip('%').str.replace('.', '').str.replace(',', '.').astype(float)
  data['P/L'] = data['P/L'].str.replace('.', '').str.replace(',', '.').astype(float)
  data['P/VP'] = data['P/VP'].str.replace('.', '').str.replace(',', '.').astype(float)
  data['LIQ.2MESES'] = data['LIQ.2MESES'].str.replace('.', '').str.replace(',', '.').astype(float)
  data['CRESC. REC.5A'] = data['CRESC. REC.5A'].str.rstrip('%').str.replace('.', '').str.replace(',', '.').astype(float)
  data['STATUS_INVEST'] = 'https://statusinvest.com.br/acoes/' + data['PAPEL']

  return data.sort_values(by='PAPEL', ascending=True)

def cleanResult(data: pd.DataFrame) -> pd.DataFrame:  
  columns_to_drop = ['LIQ.2MESES', 'CRESC. REC.5A', 'P/ATIVO', 'P/CAP.GIRO', 'P/EBIT', 'P/ATIV CIRC.LIQ', 'EV/EBIT', 'EV/EBITDA', 'MRG EBIT', 'MRG. LÍQ.', 'LIQ. CORR.', 'ROIC', 'PSR', 'DÍV.BRUT/ PATRIM.', 'PATRIM. LÍQ']

  return data.copy().drop(columns_to_drop, axis=1)

In [2]:
capturedStocks = collectDataFromWebScrapping()

if capturedStocks.empty is False:
  filtered_stocks = getFilteredStocks(capturedStocks)
  stocks = cleanResult(filtered_stocks)
  generatePDF(stocks)
  display(stocks)
else:
  print('Sorry, we could not generate the report!')

PDF generated successfully: stocks.pdf


Unnamed: 0,PAPEL,COTAÇÃO,P/L,P/VP,DIV.YIELD,ROE,STATUS_INVEST
547,ABCB4,2440,6.79,0.98,6.31,14.5,https://statusinvest.com.br/acoes/ABCB4
641,AGRO3,2420,9.43,1.26,13.27,13.34,https://statusinvest.com.br/acoes/AGRO3
448,ETER3,708,3.44,0.57,7.05,16.52,https://statusinvest.com.br/acoes/ETER3
475,GGBR3,1933,4.53,0.69,7.81,15.29,https://statusinvest.com.br/acoes/GGBR3
491,GGBR4,2151,5.04,0.77,7.02,15.29,https://statusinvest.com.br/acoes/GGBR4
586,ITSA3,1032,7.92,1.28,8.28,16.23,https://statusinvest.com.br/acoes/ITSA3
588,ITSA4,1039,7.97,1.29,8.23,16.23,https://statusinvest.com.br/acoes/ITSA4
611,ITUB3,2961,8.69,1.59,7.98,18.29,https://statusinvest.com.br/acoes/ITUB3
581,LAVV3,905,7.8,1.42,6.5,18.18,https://statusinvest.com.br/acoes/LAVV3
506,NEOE3,2045,5.56,0.83,6.45,14.94,https://statusinvest.com.br/acoes/NEOE3
