In [7]:
import os
import pandas as pd
import gradio as gr

from build_data import load_data, get_data, transformer_data
from model import get_model, calculate_shap_values
from visualizations import plot_shap_waterfall_by_campanha

In [2]:
SHAP_FILE = "shap_values.parquet"

In [3]:
def preparar_dados(producao=True):
    if producao and os.path.exists(SHAP_FILE):
        print("✔️ Carregando SHAP values do arquivo salvo.")
        df_shap_values = pd.read_parquet(SHAP_FILE)
    else:
        print("⚙️ Treinando modelo e calculando SHAP values.")
        df_loaded = load_data("df_pandas.csv")
        df_filtered, df_aux = get_data(df_loaded, "CPMAT")
        df_code = transformer_data(df_filtered)

        model_winner, df_filtered_aux = get_model(df_code)
        X_test, y_test, X_test_transformed, df_shap_values = calculate_shap_values(
            model_winner
        )

        df_shap_values = pd.concat(
            [df_shap_values, df_aux[["TIPO_CAMPANHA", "DATA"]]], axis=1
        )
        df_shap_values.dropna(inplace=True)

        df_shap_values.to_parquet(SHAP_FILE, index=False)

    return df_shap_values

In [12]:
df_teste = preparar_dados(producao=True)
df_teste

✔️ Carregando SHAP values do arquivo salvo.


Unnamed: 0,base_value,predicted_value,ETAPA_shap,TOTAL_IMPRESSOES_shap,TOTAL_CLIQUES_shap,TOTAL_CUSTO_shap,TOTAL_SESSAO_shap,TOTAL_INSCRICOES_shap,TOTAL_MATRICULADOS_shap,LEADS_shap,CTR_shap,CR_shap,CPS_shap,TX_LEADS_shap,TX_INSCRITOS_shap,CPI_shap,TX_MATRICULADOS_shap,CPA_shap,TIPO_CAMPANHA,DATA
0,28666.789195,14445.722622,856.041172,-5793.967683,-3920.325621,-5977.642081,-4837.789284,-288.880260,2995.754096,-411.865622,295.893482,1216.316162,1324.081915,494.229069,795.338236,1383.529039,-1789.320914,-562.458279,OUTROS,2025-01-16
1,28666.789195,37422.560567,131.129130,7293.520861,-2744.765452,348.991306,-4646.883763,-315.004445,5038.789286,-484.305707,-1358.415823,1989.057858,1710.800804,1155.220075,1103.322023,2187.591998,-1632.036535,-1021.240242,OUTROS,2025-01-29
2,28666.789195,2972.177272,-673.718163,-6713.075740,-3689.632582,1947.409977,-4339.740656,-7.102591,-4588.034887,98.526725,46.934434,-1226.932131,427.451219,-1353.144186,-1020.910521,-1274.387471,-2875.531233,-452.724118,SEARCH,2025-01-31
3,28666.789195,2803.381512,-293.080096,-6434.321549,-4542.443693,-3918.994759,-4371.594595,136.099573,-1152.333218,175.807730,305.882009,-1368.064316,-247.374386,-397.726448,-611.381073,-1960.265750,-380.916093,-802.701018,OUTROS,2025-01-06
4,28666.789195,11228.182852,1052.903264,-5964.656145,-4255.464226,-6847.885138,-4688.955172,-105.655684,2982.460395,-351.183651,181.535400,638.897179,7.572878,625.391709,791.106198,-187.393353,-609.630005,-707.649991,OUTROS,2025-01-27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,28666.789195,48972.996129,715.713471,5590.501137,3846.837262,4796.515728,2411.900484,97.400287,2006.281679,176.708940,-2903.929094,-2053.445124,-795.568466,1481.510657,845.768258,1387.687937,383.054771,2319.269008,OUTROS,2025-01-13
107,28666.789195,29722.516196,-2342.894095,4601.556324,5573.044177,-9203.606837,3648.070275,-72.860288,3633.063312,-565.156422,-350.902550,-361.721413,-1479.746329,-5.625277,1081.699964,357.999905,-1734.779832,-1722.413912,DEMAND GEN,2025-01-01
108,28666.789195,195294.637254,-2317.453979,36828.177252,38433.246705,12814.764092,85000.198140,-468.785216,2179.410298,-299.695528,408.609219,682.592263,-2842.894251,-861.241909,-1003.872601,926.048680,-689.061286,-2162.193821,DEMAND GEN,2025-01-15
109,28666.789195,1442.625156,-238.948271,-6714.832694,-4568.135136,-4504.618570,-4473.861862,138.108111,-764.335464,91.406219,215.497531,-1545.355210,-405.346472,-406.358435,-598.712400,-1863.724251,-780.132662,-804.814474,OUTROS,2025-01-02


In [13]:
print(df_teste.groupby("TIPO_CAMPANHA")["DATA"].unique())


TIPO_CAMPANHA
DEMAND GEN    [2025-01-04, 2025-01-23, 2025-01-11, 2025-01-1...
OUTROS        [2025-01-16, 2025-01-29, 2025-01-06, 2025-01-2...
PMAX          [2025-01-07, 2025-01-29, 2025-01-10, 2025-01-2...
SEARCH        [2025-01-31, 2025-01-21, 2025-02-01, 2025-01-2...
Name: DATA, dtype: object


In [14]:
plot_shap_waterfall_by_campanha(df_teste, campanha_escolhida='PMAX', data_escolhida= '2025-01-07', limite=10)

TOTAL_CUSTO_shap           7577.470796
TX_MATRICULADOS_shap       4799.120498
TOTAL_CLIQUES_shap         4227.999142
TOTAL_MATRICULADOS_shap   -3374.709024
CPS_shap                   1586.478402
TX_INSCRITOS_shap         -1132.922813
CTR_shap                   1080.308798
TOTAL_IMPRESSOES_shap       919.475466
CPA_shap                    833.461218
CPI_shap                   -793.005751
TOTAL_INSCRICOES_shap       688.339903
TOTAL_SESSAO_shap          -475.788865
CR_shap                     456.065507
ETAPA_shap                  306.331776
TX_LEADS_shap               194.685172
LEADS_shap                  -79.563562
dtype: float64


(<PIL.PngImagePlugin.PngImageFile image mode=RGBA size=1183x584>,
       base_value  predicted_value  ETAPA_shap  TOTAL_IMPRESSOES_shap  \
 15  28666.789195     45480.535858  306.331776             919.475466   
 
     TOTAL_CLIQUES_shap  TOTAL_CUSTO_shap  TOTAL_SESSAO_shap  \
 15         4227.999142       7577.470796        -475.788865   
 
     TOTAL_INSCRICOES_shap  TOTAL_MATRICULADOS_shap  LEADS_shap     CTR_shap  \
 15             688.339903             -3374.709024  -79.563562  1080.308798   
 
        CR_shap     CPS_shap  TX_LEADS_shap  TX_INSCRITOS_shap    CPI_shap  \
 15  456.065507  1586.478402     194.685172       -1132.922813 -793.005751   
 
     TX_MATRICULADOS_shap    CPA_shap TIPO_CAMPANHA        DATA  
 15           4799.120498  833.461218          PMAX  2025-01-07  )

In [6]:
def gerar_grafico(campanha_escolhida, data_escolhida, limite=150):
    try:
        image, _ = plot_shap_waterfall_by_campanha(
            df_shap_values, campanha_escolhida, data_escolhida, limite
        )
        return image
    except Exception as e:
        return f"Erro ao gerar gráfico: {str(e)}"

In [None]:
df_plot = gerar_grafico(PMAX, )