In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

import time
import datetime
import pandas as pd
import matplotlib.pyplot as plt
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from time import sleep
from bs4 import BeautifulSoup
import os
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from dotenv import load_dotenv
from pathlib import Path


In [2]:
# Carrega as variáveis de ambiente do arquivo .env
env_path = Path('.') / '.env'
load_dotenv(dotenv_path=env_path)

EMAIL_REMETENTE = os.getenv('EMAIL_REMETENTE')
EMAIL_DESTINO = os.getenv('EMAIL_DESTINO')
EMAIL_PASSWORD = os.getenv('EMAIL_PASSWORD')

In [3]:
def extrair_destino(url):
    parts = url.split("/")
    if len(parts) > 4:
        return parts[4]
    else:
        return "desconhecido"

def enviar_email(url, price):
    msg = MIMEMultipart()
    subject = f"Pariu Europa por {price}"
    message = f"Pariu Europa por {price}<br><br>Link: <a href='{url}'>{url}</a>"
    msg['Subject'] = subject
    msg['From'] = EMAIL_REMETENTE
    msg['To'] = EMAIL_DESTINO
    msg.add_header('Content-Type', 'text/html')
    msg.attach(MIMEText(message, 'html'))
    
    try:
        s = smtplib.SMTP('smtp.gmail.com', 587)
        s.starttls()
        s.login(EMAIL_REMETENTE, EMAIL_PASSWORD)
        s.sendmail(EMAIL_REMETENTE, [EMAIL_DESTINO], msg.as_string())
        s.quit()
        print(f"E-mail enviado para a URL: {url} com preço {price}")
    except Exception as e:
        print(f"Erro ao enviar e-mail para a URL {url}: {e}")

def processar_url(url, driver, logs_by_dest):
    print("Processando URL:", url)
    driver.get(url)
    sleep(30)  # Aguarda 30 segundos para a página carregar
    wait = WebDriverWait(driver, 50)
    try:
        wait.until(EC.presence_of_element_located((By.XPATH, "//div[@aria-label='Mais barato']")))
    except Exception as e:
        print(f"Erro ao esperar o elemento na URL {url}: {e}")
        return None, None

    content = driver.page_source
    soup = BeautifulSoup(content, 'html.parser')
    mais_barato_div = soup.find('div', attrs={'aria-label': 'Mais barato'})
    if not mais_barato_div:
        print(f"Div 'Mais barato' não encontrada na URL: {url}")
        return None, None

    price_span = mais_barato_div.find('span', string=lambda t: t and "R$" in t)
    if not price_span:
        print(f"Preço não encontrado na URL: {url}")
        return None, None

    # Valor original para e-mail
    price_text = price_span.get_text(strip=True)
    print(f"Preço encontrado na URL {url}: {price_text}")
    email_price = price_text  # Exemplo: "R$ 4.772"
    
    # Valor limpo para conversão e gráfico
    clean_price_str = price_text.replace('R$', '').replace('.', '').strip()
    timestamp = datetime.datetime.now()
    
    # Extrai o destino (usado para agrupamento e gráfico)
    destination = extrair_destino(url)
    
    try:
        numeric_price = int(clean_price_str)
    except Exception as e:
        print(f"Erro ao converter o preço '{clean_price_str}' para inteiro para na URL {url}: {e}")
        return None, None

    if destination not in logs_by_dest:
        logs_by_dest[destination] = []
    logs_by_dest[destination].append({
        "timestamp": timestamp,
        "price": numeric_price,
        "url": url
    })
    return email_price, numeric_price

def plot_log_dataframe_by_destination(logs_by_dest):
    """
    Para cada destino presente em logs_by_dest, cria um DataFrame e plota
    um gráfico de série temporal com data/hora x preço, salvando o gráfico
    em um arquivo PNG com o nome 'grafico_precos_<destino>.png'.
    """
    if not logs_by_dest:
        print("Nenhum dado para plotar.")
        return
    
    for dest, log_list in logs_by_dest.items():
        df = pd.DataFrame(log_list)
        df.sort_values('timestamp', inplace=True)
        plt.figure(figsize=(10, 6))
        plt.plot(df['timestamp'], df['price'], marker='o', linestyle='-')
        plt.xlabel('Data/Horário')
        plt.ylabel('Preço')
        plt.title(f'Histórico de Preços para {dest}')
        plt.xticks(rotation=45)
        plt.tight_layout()
        output_filename = f"grafico_precos_{dest}.png"
        plt.savefig(output_filename)
        plt.close()
        print(f"Gráfico salvo em {output_filename}")

# --- Exporta todo o histórico para CSV (método tradicional, sem função) ---
# Combine os logs de todos os destinos e exporte para um arquivo CSV

# Defina o dicionário logs_by_dest (se não for alimentado ao longo do ciclo)
logs_by_dest = {}

all_logs = []
for dest, log_list in logs_by_dest.items():
    for record in log_list:
        record_copy = record.copy()
        record_copy['destination'] = dest
        all_logs.append(record_copy)

if all_logs:
    df_history = pd.DataFrame(all_logs)
    df_history['timestamp'] = pd.to_datetime(df_history['timestamp'])
    df_history.sort_values('timestamp', inplace=True)
    csv_file = "logs_coletados.csv"
    if os.path.exists(csv_file):
        try:
            df_existing = pd.read_csv(csv_file, parse_dates=['timestamp'])
        except Exception as e:
            print(f"Erro ao ler o arquivo {csv_file}: {e}")
            df_existing = pd.DataFrame()
        df_combined = pd.concat([df_existing, df_history], ignore_index=True)
        df_combined.drop_duplicates(inplace=True)
        df_combined.sort_values('timestamp', inplace=True)
        df_combined.to_csv(csv_file, index=False)
        print(f"Histórico combinado exportado para {csv_file}")
    else:
        df_history.to_csv(csv_file, index=False)
        print(f"Histórico exportado para {csv_file}")

def main():
    logs_by_dest = {}
    while True:
        print("Iniciando ciclo de pesquisa:", datetime.datetime.now())
        chrome_options = Options()
        chrome_options.add_argument("--start-maximized")
        driver = webdriver.Chrome(options=chrome_options)
        
        with open("urls.txt", "r", encoding="utf-8") as file:
            urls = [line.strip() for line in file if line.strip()]
        
        for url in urls:
            email_price, numeric_price = processar_url(url, driver, logs_by_dest)
            if email_price is None or numeric_price is None:
                continue
            try:
                if numeric_price <= 10000:
                    enviar_email(url, email_price)
            except Exception as e:
                print(f"Erro ao processar o valor para a URL {url}: {e}")
        
        driver.quit()
        
        # Exporta todo o histórico para CSV
        all_logs = []
        for dest, log_list in logs_by_dest.items():
            for record in log_list:
                record_copy = record.copy()
                record_copy['destination'] = dest
                all_logs.append(record_copy)
        if all_logs:
            df_history = pd.DataFrame(all_logs)
            df_history['timestamp'] = pd.to_datetime(df_history['timestamp'])
            df_history.sort_values('timestamp', inplace=True)
            csv_file = "pesquisa_kayak_urls_txt.csv"
            if os.path.exists(csv_file):
                try:
                    df_existing = pd.read_csv(csv_file, parse_dates=['timestamp'])
                except Exception as e:
                    print(f"Erro ao ler o arquivo {csv_file}: {e}")
                    df_existing = pd.DataFrame()
                df_combined = pd.concat([df_existing, df_history], ignore_index=True)
                df_combined.drop_duplicates(inplace=True)
                df_combined.sort_values('timestamp', inplace=True)
                df_combined.to_csv(csv_file, index=False)
                print(f"Histórico combinado exportado para {csv_file}")
            else:
                df_history.to_csv(csv_file, index=False)
                print(f"Histórico exportado para {csv_file}")
        
        plot_log_dataframe_by_destination(logs_by_dest)
        
        print("Ciclo finalizado. Aguardando 2 horas para o próximo ciclo.\n")
        time.sleep(7200)

In [4]:
if __name__ == "__main__":
    main()


Iniciando ciclo de pesquisa: 2025-03-04 15:40:22.545981
Processando URL: https://www.kayak.com.br/flights/FLN-MAD,LIS,OPO/2025-04-18-flexible-3days/2025-05-01-flexible-3days?ucs=xb47ee&sort=price_a
Preço encontrado na URL https://www.kayak.com.br/flights/FLN-MAD,LIS,OPO/2025-04-18-flexible-3days/2025-05-01-flexible-3days?ucs=xb47ee&sort=price_a: R$ 4.690
E-mail enviado para a URL: https://www.kayak.com.br/flights/FLN-MAD,LIS,OPO/2025-04-18-flexible-3days/2025-05-01-flexible-3days?ucs=xb47ee&sort=price_a com preço R$ 4.690
Processando URL: https://www.kayak.com.br/flights/FLN-FRA,PAR,AMS/2025-04-18-flexible-3days/2025-05-01-flexible-3days?ucs=138bwn0&sort=price_a
Preço encontrado na URL https://www.kayak.com.br/flights/FLN-FRA,PAR,AMS/2025-04-18-flexible-3days/2025-05-01-flexible-3days?ucs=138bwn0&sort=price_a: R$ 5.399
E-mail enviado para a URL: https://www.kayak.com.br/flights/FLN-FRA,PAR,AMS/2025-04-18-flexible-3days/2025-05-01-flexible-3days?ucs=138bwn0&sort=price_a com preço R$ 5.3

KeyboardInterrupt: 