# Tratamento de Lista Geral de Leads
## Objetivo: Esse pipeline tem como objetivo separar a lista de Leads disponível para disparos

### Campanha: lcto-ofan-jan26
### Conversion types: 1


In [15]:
# Importações básicas
import pandas as pd
import numpy as np
import sys
from pathlib import Path
import os
sys.path.insert(0, os.path.join(os.path.dirname(os.getcwd()), 'src'))

# Adiciona src ao path
sys.path.append('../src')

#

# Utilitários de dados
from data_utils import (
    load_raw_data,
    save_processed_data,
    remove_duplicates,
    handle_missing_values,
    detect_outliers,
    normalize_column,
    process_phone_string,
    process_phone_number,
    clean_and_lower_column,
    flatten_list_to_df,
    remove_buyers_from_dataframe
)

CRONOGRAMA_SUBDOMAIN = 'cronogramadosfluentes-xwamel'

# Utilitários SQL
from sql_utils import DatabaseConnection as Dbc, load_query_from_file

# Utilitários de visualização
import matplotlib.pyplot as plt
import seaborn as sns

# Utilitários de API
from api_utils import (
    make_request,
    get_json,
    post_json,
    paginated_request,
    response_to_dataframe
)

# utilitários hotmart
from hotmart_utils import Hotmart

# utilitários tmb
from tmb_utils import TMB   

# Configurações pandas
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Load Database Driver
db = Dbc()

# Inicializar API Hotmart
hotmart = Hotmart()

# Inicializar API TMB
tmb = TMB()

print('✓ Importações concluídas com sucesso!')

✓ Importações concluídas com sucesso!


## Load Leads Dataframe

In [16]:
df = load_raw_data(
    "wtl_events_202601061128.csv"
)

In [17]:
from datetime import datetime, timezone
import math

conversions = []

def is_nan_or_none(val):
    return val is None or (isinstance(val, float) and math.isnan(val))

def clean_json_value(val):
    """
    Clean a single value to be JSON compliant, replacing nan/None with None and forcing types
    """
    if is_nan_or_none(val):
        return None
    if isinstance(val, float):
        # JSON does not support inf
        if math.isinf(val):
            return None
    return val

def to_iso_z(dt_str: str) -> str:
    """
    Entrada:  '2026-01-04 23:51:58.000 -0300'
    Saída:    '2026-01-05T02:51:58.000Z'  (UTC, com milissegundos e Z)
    """
    if is_nan_or_none(dt_str):
        return None
    try:
        dt = datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S.%f %z")   # tz-aware
        dt_utc = dt.astimezone(timezone.utc)
        return dt_utc.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z"
    except Exception:
        return None

def make_json_compliant_dict(item):
    """
    Recursively replace values that are not JSON compliant (e.g., nan/None/inf) with None.
    """
    if isinstance(item, dict):
        return {k: make_json_compliant_dict(v) for k, v in item.items()}
    elif isinstance(item, list):
        return [make_json_compliant_dict(val) for val in item]
    else:
        return clean_json_value(item)

for idx, row in df.iterrows():
    # Defensive get and clean for all fields that might be nan
    email = row.get("field_email")
    email = "" if is_nan_or_none(email) else str(email).lower().strip()

    phone_val = row.get("field_phone")
    phone_str = "" if is_nan_or_none(phone_val) else str(phone_val).strip()
    phone = process_phone_number(phone_str)

    data_val = row.get("data")
    conversion_date = to_iso_z(data_val)

    campaign_id = clean_json_value(row.get("field_conversion"))
    utm_source = clean_json_value(row.get("utm_source"))
    utm_campaign = clean_json_value(row.get("utm_campaign"))
    utm_medium = clean_json_value(row.get("utm_medium"))
    utm_content = clean_json_value(row.get("utm_content"))
    url_page_path = clean_json_value(row.get("url_page_path"))

    conversion_dict = { 
        "lead_data": {
            "email": {"value": email},
            "phone": phone,
        }, 
        "conversion_data": {
            "conversion_date": conversion_date,
            "campaign_id": campaign_id,
            "conversion_type_id": "1",
            "utm_source": utm_source,
            "utm_campaign": utm_campaign,
            "utm_medium": utm_medium,
            "utm_content": utm_content,
            "conversion_raw_info": { 
                "page_url": url_page_path
            }
        },
        "flow_settings": {
            "skip_conversion": False,
            "skip_orquestration": True
        }
    }

    # Recursively clean before append to eliminate any possible nan or non-JSON-compliant values
    conversion_dict = make_json_compliant_dict(conversion_dict)
    conversions.append(conversion_dict)

conversions


[{'lead_data': {'email': {'value': 'vanessachaves22@hotmail.com'},
   'phone': {'raw_phone_input': '(55) 21995-9579',
    'formatted_phone': '+5555219959579',
    'whatsapp_format': '+5555219959579',
    'isValid': False,
    'type': 'Nacional',
    'ddd': 55,
    'ddi': 55,
    'region': 'BR',
    'carrier': '',
    'location': '',
    'timezone': ['Etc/Unknown'],
    'number_type': 'Unknown'}},
  'conversion_data': {'conversion_date': '2026-01-05T02:51:58.000Z',
   'campaign_id': 'lcto-ofan-jan26',
   'conversion_type_id': '1',
   'utm_source': 'ig',
   'utm_campaign': 'ppt-lpm-cdf',
   'utm_medium': 'social',
   'utm_content': 'link_in_bio',
   'conversion_raw_info': {'page_url': '/pc-ofan-v1/'}},
  'flow_settings': {'skip_conversion': False, 'skip_orquestration': True}},
 {'lead_data': {'email': {'value': 'vanessachaves22@hotmail.com'},
   'phone': {'raw_phone_input': '(55) 21995-9579',
    'formatted_phone': '+5555219959579',
    'whatsapp_format': '+5555219959579',
    'isValid':

In [20]:
import requests
import time
import json
import os

BASE_URL = "https://webhooks.aloud.com.br/data/ce7d3845-3acf-4606-835c-63485eb5fe70"

responses = []

PERSISTENCE_FILE = "imported_leads.json"

# Carregue os registros persistidos previamente já enviados, se existir
if os.path.exists(PERSISTENCE_FILE):
    with open(PERSISTENCE_FILE, "r", encoding="utf-8") as f:
        imported_leads = set(json.load(f))
else:
    imported_leads = set()

total_to_import = len(conversions)
print(f"Total de registros para importar: {total_to_import}")

for idx, item in enumerate(conversions, start=1):
    email = (item.get("lead_data", {}).get("email", {}).get("value") or "").lower().strip()
    if not email:
        print(f"Registro {idx} ignorado: email ausente ou inválido.")
        continue
    if email in imported_leads:
        print(f"Registro {idx} já foi importado anteriormente para o email '{email}', ignorando.")
        continue
    
    print(f"Importando registro {idx} de {total_to_import}... ({email})")
    response = requests.post(BASE_URL, json=item)
    try:
        resp_json = response.json()
    except Exception:
        resp_json = response.text
    responses.append(resp_json)
    
    # Atualize a persistência local imediatamente
    imported_leads.add(email)
    with open(PERSISTENCE_FILE, "w", encoding="utf-8") as f:
        json.dump(sorted(list(imported_leads)), f, ensure_ascii=False, indent=2)
    
    time.sleep(1)

responses

Total de registros para importar: 2342
Registro 1 já foi importado anteriormente para o email 'vanessachaves22@hotmail.com', ignorando.
Registro 2 já foi importado anteriormente para o email 'vanessachaves22@hotmail.com', ignorando.
Registro 3 já foi importado anteriormente para o email 'guizamg321@gmail.com', ignorando.
Registro 4 já foi importado anteriormente para o email 'venanciojacinto740@gmail.com', ignorando.
Registro 5 já foi importado anteriormente para o email 'jacqueline.trabalhocs@gmail.com', ignorando.
Registro 6 já foi importado anteriormente para o email 'jacqueline.trabalhocs@gmail.com', ignorando.
Registro 7 já foi importado anteriormente para o email 'joaodalagil@gmail.com', ignorando.
Registro 8 já foi importado anteriormente para o email 'josepettidocs@gmail.com', ignorando.
Registro 9 já foi importado anteriormente para o email 'toweracabamentos@gmail.com', ignorando.
Registro 10 já foi importado anteriormente para o email 'ghusantana20@gmail.com', ignorando.
Regi

[{'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message': 'Workflow was started'},
 {'message':