In [10]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver import ChromeOptions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
import unicodedata
from urllib.parse import quote
import pandas as pd
from time import sleep

In [11]:


class ProcessSearcher:
    def __init__(self, data_request):
        service = Service()
        options = ChromeOptions()
        options.add_argument("--no-sandbox")  # desativa o sandbox
        # options.add_argument("--headless") #executa sem GUI
        # desabilita aceleracao de hardware
        options.add_argument("--disable-gpu")
        self.driver = webdriver.Chrome(options, service)
        self.data_request = data_request
        
        #site stf
        stf = ["https://portal.stf.jus.br/processos/", "https://portal.stf.jus.br/processos/listarPartes.asp?termo="]

        tables_dict = {}

        tables_dict['stf'] = self._search_stf(stf[1])
        self.driver.quit()
        print(self.format_html_table(tables_dict))

    # Formata o dicionário, transformando os df em tabelas e colocando titulo
    def format_html_table(self, tables_dict):
        html_tables = ""
        for title, df in tables_dict.items():
            if df is not None:
                df = self.transform_to_links(df)

                html_tables += f"<h2>{title.upper()}</h2>\n"
                html_tables += df.to_html(index=False, escape=False)
        print(html_tables)
        return html_tables

    # transforma columa de processo em hyperlink
    def transform_to_links(self, df: pd.DataFrame) -> pd.DataFrame:
        def make_link(row):
            return f'<a href="{row["link"]}"  target="_blank">{row["processo"]}</a>'

        df['ultima_movimentacao'] = df.apply(make_link, axis=1)
        df.drop(columns=['link'], inplace=True)
        df = df.rename(columns={'processo': 'Processo',
                       'ultima_movimentacao': 'Última Movimentação'})
        return df

    def _search_stf(self, link):
        # try:
        dr = self.data_request
        if "cpf" in dr:
            return 'erro'
        elif "nome" in dr:
            info = dr["nome"].upper()
        
        info_quote = quote(info)
        
        url_pesquisa = link + info_quote

        self.driver.get(url_pesquisa)
        WebDriverWait(self.driver, 100).until(EC.visibility_of_element_located((By.XPATH, '//*[@id="quantidade"]')))
        
        qtde_processo = self.driver.find_element(By.XPATH, '//*[@id="quantidade"]').text
        qtde_processo = int(qtde_processo)
        
        return self._format_dataframe_stf(qtde_processo, info)
            
            
        # except Exception as e:
        #     print(e.with_traceback)

    def _format_dataframe_stf(self, qtde_processo, nome):
        nome = self.remover_acentos(nome)
        
        partes = {"link": [], "processo": [], "ultima_movimentacao": []}

        
        for i in range(1, qtde_processo+1):
            nome_parte = self.driver.find_element(By.XPATH, f'//*[@id="card_processos"]/div[{i}]/div[2]/div/div[1]/div[2]').text
            nome_parte = self.remover_acentos(nome_parte)
            
            if nome_parte == nome:
                link = self.driver.find_element(By.XPATH, f'//*[@id="card_processos"]/div[{i}]/div[1]/h6[1]/span/a').get_attribute('href')

                processo = self.driver.find_element(By.XPATH, f'//*[@id="card_processos"]/div[{i}]/div[1]/h6[1]/span/a').text
                
                # print(f'{link}, {processo}')
                ult_mov = self.driver.find_element(By.XPATH, f'//*[@id="card_processos"]/div[{i}]/div[2]/div/div[2]/div[2]').text
                
                partes["link"].append(link)
                partes["processo"].append(processo)
                partes["ultima_movimentacao"].append(ult_mov)
        
        df_stf = pd.DataFrame.from_dict(partes)
        
        #display(df_stf)
        
        return df_stf
                
    def remover_acentos(self, texto):
        texto_sem_acentos = ''.join(c for c in unicodedata.normalize('NFD', texto) if unicodedata.category(c) != 'Mn')
        
        return texto_sem_acentos

In [12]:
def main():
    PS = ProcessSearcher({'nome': 'João Silva Neto'})

main()

<h2>STF</h2>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Processo</th>
      <th>Última Movimentação</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>ARE 1367479</td>
      <td><a href="https://portal.stf.jus.br/processos/detalhe.asp?incidente=6341521"  target="_blank">ARE 1367479</a></td>
    </tr>
    <tr>
      <td>ARE 814132</td>
      <td><a href="https://portal.stf.jus.br/processos/detalhe.asp?incidente=4576991"  target="_blank">ARE 814132</a></td>
    </tr>
    <tr>
      <td>ARE 637942</td>
      <td><a href="https://portal.stf.jus.br/processos/detalhe.asp?incidente=4053495"  target="_blank">ARE 637942</a></td>
    </tr>
    <tr>
      <td>RE 632192</td>
      <td><a href="https://portal.stf.jus.br/processos/detalhe.asp?incidente=3978488"  target="_blank">RE 632192</a></td>
    </tr>
    <tr>
      <td>AI 719447</td>
      <td><a href="https://portal.stf.jus.br/processos/detalhe.asp?incidente=2625811"  target="_blank">AI 71