In [1]:
from selenium.webdriver.common.keys import Keys 
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from time import sleep
from datetime import datetime
import pytz
import json

In [2]:
import sys
from pathlib import Path
sys.path.append(str(Path.cwd().parent))  # Add parent directory to Python path
from driver import Bot

from colorama import Fore, Style
from selenium.webdriver.common.keys import Keys 

# Initialize the bot
bot = Bot(session_name='anderson')

# Test login and chat counting functionality
print(Fore.GREEN + "Starting WhatsApp Web test..." + Style.RESET_ALL)


[32mStarting WhatsApp Web test...[0m


In [3]:
bot.login('+55')

Attempting to load WhatsApp Web...
[32mLogged in successfully![0m


In [29]:
def decode_latin(text):
    try:
        # Primeira tentativa: decodificar direto como utf-8
        return text.encode().decode('utf-8')
    except Exception as e1:
        try:
            # Segunda tentativa: decodificar caracteres unicode
            return bytes(text, 'utf-8').decode('unicode_escape')
        except Exception as e2:
            try:
                # Terceira tentativa: método original modificado
                return text.encode('raw_unicode_escape').decode('utf-8')
            except Exception as e3:
                print(f"Aviso: Não foi possível decodificar texto: '{text}'")
                print(f"Erros: {e1}, {e2}, {e3}")
                return text


def get_date_of_attach_msg(message):
    date = None
    time = None
    utc_dt = None

    try:
        # Pega a posição Y da mensagem atual
        message_location = message.location['y']

        # Procura todas as divs de data no container pai
        parent = message.find_element(By.XPATH, "./../../..")
        date_divs = parent.find_elements(By.CSS_SELECTOR, "div._amk4._amkb")

        # Encontra a div de data mais próxima acima da mensagem
        closest_date_div = None
        smallest_distance = float('inf')

        for date_div in date_divs:
            date_location = date_div.location['y']
            # Só considera datas que estão acima da mensagem
            if date_location < message_location:
                distance = message_location - date_location
                if distance < smallest_distance:
                    smallest_distance = distance
                    closest_date_div = date_div

        if closest_date_div:
            date = closest_date_div.text

        # Procura o horário em mensagens com anexo
        time_element = message.find_elements(
            By.CSS_SELECTOR, "span.x1rg5ohu.x16dsc37[dir='auto']")
        if time_element and time_element[0].text:
            time = time_element[0].text

        # Converte para UTC se tiver data e hora
        if date and time:
            local_tz = pytz.timezone('America/Sao_Paulo')
            datetime_str = f"{date} {time}"
            local_dt = datetime.strptime(datetime_str, "%d/%m/%Y %H:%M")
            local_dt = local_tz.localize(local_dt)
            utc_dt = local_dt.astimezone(pytz.UTC)

    except Exception as e:
        print(f"Erro ao processar data/hora: {e}")

    return utc_dt


def get_all_message_info(messages_elements):
    messages = []
    for message in messages_elements:
        try:
            time, date, sender, utc_dt = None, None, None, None

            # Verificar se existe mensagem citada
            quoted_message = None
            quoted_elements = message.find_elements(
                By.CSS_SELECTOR, "div[role='button'][aria-label='Quoted message']")
            if len(quoted_elements) > 0:
                try:
                    quoted_sender = quoted_elements[0].find_elements(
                        By.CSS_SELECTOR, "span[dir='auto']._ao3e")
                    quoted_text = quoted_elements[0].find_elements(
                        By.CSS_SELECTOR, "span.quoted-mention._ao3e")
                    if len(quoted_text) > 0:
                        quoted_message = {
                            "sender": quoted_sender[0].text if len(quoted_sender) > 0 else '',
                            "text": quoted_text[0].text
                        }
                except Exception as e:
                    print(f"Erro ao processar mensagem citada: {e}")

            # Verifica mensagem de texto
            copyable_text = message.find_elements(
                By.CSS_SELECTOR, ".copyable-text")
            text = ''
            if len(copyable_text) > 0:
                main_text_element = message.find_elements(
                    By.CSS_SELECTOR, "span.selectable-text.copyable-text")
                if len(main_text_element) > 0:  # Se encontrou algum elemento
                    try:
                        # Pega o texto do primeiro elemento
                        text = decode_latin(main_text_element[0].text.strip())
                    except Exception as e:
                        print(f"Erro ao processar mensagem main: {e}")

                date_text = copyable_text[0].get_attribute(
                    "data-pre-plain-text")
                if date_text:
                    # Extrai a data do formato '[HH:MM, DD/MM/YYYY] Nome: '
                    date_parts = date_text.split('] ')[
                        0].replace('[', '').split(', ')
                    time = date_parts[0]
                    date = date_parts[1]
                    sender = date_text.split('] ')[1].replace(': ', '')

                    # Convertendo para UTC
                    # Assumindo fuso horário de São Paulo
                    local_tz = pytz.timezone('America/Sao_Paulo')
                    datetime_str = f"{date} {time}"
                    local_dt = datetime.strptime(
                        datetime_str, "%d/%m/%Y %H:%M")
                    local_dt = local_tz.localize(local_dt)
                    utc_dt = local_dt.astimezone(pytz.UTC)

            attachment_data = None
            # TODO: Check date and time of the message when is attachment
            # Check for images
            image_elements = message.find_elements(
                By.CSS_SELECTOR, "img[data-testid='image-thumb'], img[class*='x15kfjtz']")
            if len(image_elements) > 0:
                utc_dt = get_date_of_attach_msg(message)
                attachment_data = None
                # attachment_data = process_image_attachment(
                #     image_elements[0])

            # Check for PDF
            pdf_elements = message.find_elements(
                By.CSS_SELECTOR, "div[role='button'][title^='Download']")
            if pdf_elements:
                utc_dt = get_date_of_attach_msg(message)
                attachment_data = None
                # attachment_data = process_pdf_attachment(pdf_elements[0])

            # Click download button if present
            download_audio_button = message.find_elements(
                By.CSS_SELECTOR, "button[aria-label='Download voice message']")
            if len(download_audio_button) > 0:
                download_audio_button[0].click()
                sleep(2)  # Wait for download to start

            # Check for voice message
            audio_play_button = message.find_elements(
                By.CSS_SELECTOR, 'button[aria-label="Play voice message"]')

            if audio_play_button:
                utc_dt = get_date_of_attach_msg(message)
                # Get duration before playing
                duration_element = message.find_elements(
                    By.CSS_SELECTOR, "div._ak8w")
                if len(duration_element) > 0:
                    duration_text = duration_element[0].text if duration_element else "0:00"
                else:
                    duration_text = "0:00"
                print(f"Audio Duration: {duration_text}")
                attachment_data = None
                # attachment_data = process_audio_attachment(
                #     audio_play_button, duration_text)

            message_data = {
                "text": text,
                "time": time,
                "date": date,
                "sender": sender,
                "quoted_message": quoted_message,
                "attachment_data": attachment_data,
                # TODO modificar se salvar em db
                "timestamp_utc": utc_dt.isoformat() if utc_dt else None,
            }
            print(message_data)

            messages.append(message_data)
        except Exception as e:
            print(f"Erro ao processar uma mensagem: {e}")
    return messages

# PEGAR TEXTO DA MENSAGEM


def get_all_messages():
    # Localize o contêiner da conversa
    conversation_container = bot.driver.find_element(
        By.XPATH, '//*[@id="main"]/div[3]/div/div[2]')

    previous_height = 0
    previous_message_count = 0
    messages_elements = []

    # Extraxt all messages
    while True:
        # Extraia mensagens atuais
        messages_elements = conversation_container.find_elements(
            By.CSS_SELECTOR, ".message-in, .message-out")

        # Se não houver novas mensagens após o scroll, sair do loop
        if len(messages_elements) == previous_message_count:
            break

        previous_message_count = len(messages_elements)

        # TODO VER FRASE EM PORTUGUES
        # TODO Pode ocorrer o problema "Couldn't get older messages. Open WhatsApp on your phone and click here to try again.""
        older_messages_button = bot.driver.find_elements(
            By.XPATH, "//button[.//div[contains(text(), 'Click here to get older messages from your phone.')]]")
        if len(older_messages_button) > 0:
            older_messages_button[0].click()
            sleep(10)

        print('Attempting to scroll')
        # bot.driver.execute_script(
        #     "arguments[0].scrollTop = arguments[0].scrollHeight;", conversation_container)
        # ou
        bot.driver.execute_script(
            "arguments[0].scrollTop = 0;", conversation_container)
        sleep(2)

    messages = get_all_message_info(messages_elements)
    print(messages)
    return list(messages)


# Coleta de mensagens
# all_messages = get_all_messages(conversation_container)

# Exibir mensagens coletadas
# for msg in all_messages:
#     print(msg)

In [6]:
def scroll_chats_list():
    chat_list_element = bot.driver.find_element(By.ID, "pane-side")
    # current_scroll = bot.driver.execute_script(
    #     "return arguments[0].scrollTop", chat_list_element)
    bot.driver.execute_script(
        "arguments[0].scrollTop = -arguments[0].scrollTop + 1000", chat_list_element)
    sleep(2)


In [None]:
#bot.click_first_chat_and_scroll()
# GET ALL CONTACTS NAMES FROM CHATS LIST
# Localizar a lista de chats
all_chats = set()  # Usar set para evitar duplicatas
last_seen_name = None  # Nome do último item visível na lista
# chat_list_element = bot.driver.find_element(By.ID, "pane-side")

while True:
    # Encontrar os chats visíveis
    chat_items = bot.driver.find_elements(By.XPATH, "//div[@aria-label='Lista de conversas']//div[@role='listitem']")
    if len(chat_items) == 0:
        chat_items = bot.driver.find_elements(
            By.XPATH, "//div[@aria-label='Chat list']//div[@role='listitem']")

    # chat_items = bot.driver.find_elements(By.CLASS_NAME, "x10l6tqk")
    # new_last_seen_name = None
    
    # se todos os chats já foram coletados, sair do loop, checa todos os no
    already_collected_chats = []
    # para teste somente 1 chat
    # chat_items = [chat_items[0], chat_items[1], chat_items[2],
    #               chat_items[3], chat_items[4], chat_items[5]]
    chat_items = [chat_items[0]]
    for chat in chat_items:
        try:
            # Captura o nome do chat e outras informações
            name = chat.find_element(By.XPATH, ".//span[@dir='auto']").text
            print(f'name: {name}')
            last_date_elements = chat.find_elements(
                By.XPATH, ".//div[@class='_ak8i']")
            if len(last_date_elements) > 0:
                last_date = last_date_elements[0].text
                print(f'last_date: {last_date}')
                # can be a date DD/MM/YYYY or a time HH:MM or a Day of the week(Saturday, Sunday, Monday, etc)
                
            # Adicionar informações únicas ao conjunto
            # if name not in all_chats:
            #     all_chats.add(f"{name}")
            #     chat.click()
            #     sleep(2)
            #     # get all messages and save in a file
            #     print(f'getting messages from {name}')
            #     messages = get_all_messages()
            #     print(f'messages: {messages}')
            #     with open(f"{name}.json", "w", encoding='utf-8') as f:
            #         json.dump(messages, f, ensure_ascii=False, indent=2)
            #     chat.click()
            #     sleep(2)
            # else:
            #     already_collected_chats.append(name) 
            
            # Atualiza o último nome visto no final da lista
            # if i == len(chat_items) - 1:
            #     new_last_seen_name = name
            #     chat.click()
            #     sleep(2)
        except Exception as e:
            print(f"Erro ao processar um chat: {e}")
    break
    if len(already_collected_chats) == len(chat_items):
        break
    
    scroll_chats_list()
    # Fazer scroll para o último item
    # if chat_items:
    #     current_scroll = bot.driver.execute_script(
    #         "return arguments[0].scrollTop", chat_list_element)
    #     bot.driver.execute_script(
    #         "arguments[0].scrollTop = arguments[0].scrollTop + 1000", chat_list_element)
    #     sleep(2)  # Esperar o carregamento de novos chats

    # Condição de parada: o último nome não mudou
    # if new_last_seen_name == last_seen_name:
    #     break
    # last_seen_name = new_last_seen_name

print(all_chats)
print(len(all_chats))