In [26]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver import ChromeOptions
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import Select
from bs4 import BeautifulSoup
import unicodedata
from urllib.parse import quote
import pandas as pd
from time import sleep

In [27]:


class ProcessSearcher:
    def __init__(self, data_request):
        service = Service()
        options = ChromeOptions()
        options.add_argument("--no-sandbox")  # desativa o sandbox
        # options.add_argument("--headless") #executa sem GUI
        # desabilita aceleracao de hardware
        options.add_argument("--disable-gpu")
        self.driver = webdriver.Chrome(options, service)
        self.data_request = data_request
        
        #site stf
        stf = ["https://portal.stf.jus.br/processos/", "https://portal.stf.jus.br/processos/listarPartes.asp?termo="]
        
        #eproc
        trf2 = ["https://eproc.trf2.jus.br/eproc/externo_controlador.php?acao=processo_consulta_publica"]

        tables_dict = {}

        tables_dict['trf2'] = self._search_eproc(trf2[0])
        self.driver.quit()
        
        self.table_response = self.format_html_table(tables_dict)


    # Formata o dicionário, transformando os df em tabelas e colocando titulo
    def format_html_table(self, tables_dict):
        html_tables = ""
        for title, df in tables_dict.items():
            if df is not None:
                df = self.transform_to_links(df)

                html_tables += f"<h2>{title.upper()}</h2>\n"
                html_tables += df.to_html(index=False, escape=False)
        
        return html_tables

    # transforma columa de processo em hyperlink
    def transform_to_links(self, df: pd.DataFrame) -> pd.DataFrame:
        def make_link(row):
            return f'<a href="{row["link"]}"  target="_blank">{row["processo"]}</a>'

        df['ultima_movimentacao'] = df.apply(make_link, axis=1)
        df.drop(columns=['link'], inplace=True)
        df = df.rename(columns={'processo': 'Processo',
                       'ultima_movimentacao': 'Última Movimentação'})
        return df
    
    def _search_eproc(self, url):
        self.driver.get(url)
        
        dr = self.data_request
        if "cpf" in dr:
            search_field = self.driver.find_element(
                    By.XPATH, '//*[@id="txtCpfCnpj"]'
                )
        elif "nome" in dr:
            search_field = self.driver.find_element(
                    By.XPATH, '//*[@id="txtStrParte"]' 
                )
        info = list(dr.values())[0]
        search_field.click()
        search_field.send_keys(info)
        
        #consulta
        self.driver.find_element(
            By.XPATH, '//*[@id="sbmNovo"]'
        ).click()
        
        #espera tabela de resultados
        WebDriverWait(self.driver, 70).until (
                EC.visibility_of_element_located(
                    (By.XPATH, '//*[@id="divInfraAreaTabela"]/table')
                )
            )

        return self._parse_results()
        
    def _parse_results(self):
        
        linhas = self.driver.find_elements(By.XPATH, '//*[@id="divInfraAreaTabela"]/table/tbody/tr')
        tam_tabela = len(linhas) + 1
        dr = self.data_request
        if "nome" in dr:
            nome = dr["nome"]
            dfs = []
            for i in range(2, tam_tabela):
                    xpath_nomeParte = f'//*[@id="divInfraAreaTabela"]/table/tbody/tr[{i}]/td[1]'
                    nome_parte = self.driver.find_element(By.XPATH, xpath_nomeParte).text
                    
                    if self.remover_acentos(nome_parte).lower() == self.remover_acentos(nome).lower():
                        link_processos = self.driver.find_element(By.XPATH, f'{xpath_nomeParte}/a').get_attribute("href")
                        
                        #abrir link com processos
                        self.driver.execute_script("window.open();")
                        
                        #muda para nova aba
                        self.driver.switch_to.window(self.driver.window_handles[1])
                        
                        # entra no link de processes
                        self.driver.get(link_processos)

                        dfs.append(self._info_processos())
                        
                        #retorna para aba principal
                        self.driver.switch_to.window(self.driver.window_handles[0])
            
            return pd.concat(dfs, ignore_index=True)
                
        return self._info_processos()
                   
    def _info_processos(self):
        partes = {"link": [], "processo": [], "ultima_movimentacao": []}
        
        #informacoes sobre processo
        table_processos = self.driver.find_element(By.XPATH, '//*[@id="divInfraAreaTabela"]/table/tbody')
        qtde_processos = len(table_processos.find_elements(By.TAG_NAME, 'tr'))
        
        for i in range(2, qtde_processos):
            link = self.driver.find_element(By.XPATH, f'//*[@id="divInfraAreaTabela"]/table/tbody/tr[{i}]/td[1]/a').get_attribute("href")
            processo = self.driver.find_element(By.XPATH, f'//*[@id="divInfraAreaTabela"]/table/tbody/tr[{i}]/td[1]').text
            ult_mov =  self.driver.find_element(By.XPATH, f'//*[@id="divInfraAreaTabela"]/table/tbody/tr[{i}]/td[5]').text
            
            partes["link"].append(link)
            partes["processo"].append(processo)
            partes["ultima_movimentacao"].append(ult_mov)

        return pd.DataFrame.from_dict(partes)
    
    def remover_acentos(self, texto):
        texto_sem_acentos = ''.join(c for c in unicodedata.normalize('NFD', texto) if unicodedata.category(c) != 'Mn')
        
        return texto_sem_acentos

In [29]:
def main():
    PS = ProcessSearcher({'nome': 'JOÃO CARLOS DA FONSECA SILVA FILHO'})
    
    print(PS.table_response)

main()

<h2>TRF2</h2>
<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>Processo</th>
      <th>Última Movimentação</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>5018528-43.2019.4.02.9666</td>
      <td><a href="https://eproc.trf2.jus.br/eproc/externo_controlador.php?acao=processo_seleciona_publica&acao_origem=processo_consulta_nome_parte_publica&acao_retorno=processo_consulta_nome_parte_publica&num_processo=50185284320194029666&num_chave=&hash=c540521521551645d994c52f7d124734&num_chave_documento="  target="_blank">5018528-43.2019.4.02.9666</a></td>
    </tr>
  </tbody>
</table>
