In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.gridspec as gridspec
from matplotlib.patches import Patch, Rectangle
from sklearn.metrics import confusion_matrix
import os

In [2]:
save_path = '../assets'
if not os.path.exists(save_path): os.makedirs(save_path)

In [3]:
plt.rcParams['font.family'] = 'sans-serif'
sns.set_style("white")

# Paleta de Cores
COLOR_FRAUD = "#0D1B5E"   # Azul Escuro
COLOR_NORMAL = "#0085FF"  # Azul Claro
COLOR_TEXT = "#333333"
PALETTE_PRED = [COLOR_NORMAL, COLOR_FRAUD]
PALETTE_ROLES = {'Honest': '#F26522', 'Mule': '#0085FF', 'Boss': '#0D1B5E', 'Unknown': 'gray'}


# CARREGAMENTO E PREPARAÇÃO DE DADOS

try:
    df = pd.read_csv('../data/01_raw/resultados_finais_modelagem.csv', index_col=0)

    if 'predicao_classe' in df.columns:
        df = df.rename(columns={'predicao_classe': 'is_fraud_pred'})
    
    # Carrega dados raw apenas para contagem total
    if os.path.exists('../data/01_raw/synthetic_dataset.csv'):
        df_raw = pd.read_csv('../data/01_raw/synthetic_dataset.csv')
        total_transacoes = len(df_raw)
        del df_raw
    else:
        total_transacoes = len(df)

    y_real = df['is_fraud_real'] if 'is_fraud_real' in df.columns else df['is_fraud_pred']
    y_pred = df['is_fraud_pred']

except FileNotFoundError:
    print("Arquivo não encontrado.")

# Dados manuais dos modelos
dados_modelos = {
    'Modelo': ['XGBoost', 'Random Forest', 'Logistic Reg'],
    'AUC': [0.99, 0.97, 0.98],
    'Recall': [0.92, 0.88, 0.92],
    'Precision': [0.98, 0.97, 0.75]
}
df_metrics = pd.DataFrame(dados_modelos).set_index('Modelo')


# FUNÇÕES AUXILIARES

def add_border(ax, kind='rect'):
    """
    Adiciona borda ao gráfico.
    kind='rect': Adiciona um Retângulo
    kind='spines': Colore as espinhas do eixo
    """
    if kind == 'rect':
        rect = Rectangle((0, 0), 1, 1, transform=ax.transAxes, linewidth=1.5, edgecolor='black', facecolor='none')
        ax.add_patch(rect)
    elif kind == 'spines':
        for spine in ax.spines.values():
            spine.set_edgecolor('black')
            spine.set_linewidth(1.5)

def draw_card(ax, t, v, c):
    """Desenha um cartão de KPI"""
    ax.axis('off')
    ax.text(0.5, 0.6, v, fontsize=22, fontweight='bold', color=c, ha='center', va='center')
    ax.text(0.5, 0.25, t, fontsize=10, color='gray', ha='center', va='center')
    add_border(ax, kind='rect')

# GERAÇÃO DOS GRÁFICOS

# --- IMAGEM 1: DASHBOARD ---
fig = plt.figure(figsize=(16, 10), dpi=300) 
gs = gridspec.GridSpec(2, 4, height_ratios=[0.25, 0.75])
gs.update(wspace=0.3, hspace=0.3)

# KPIs
kpi_contas = len(df)
kpi_fraudes = df['is_fraud_pred'].sum()
kpi_valor = df[df['is_fraud_pred'] == 1]['total_in'].sum()

draw_card(fig.add_subplot(gs[0, 0]), "Total Transações", f"{total_transacoes:,.0f}".replace(',', '.'), "#333")
draw_card(fig.add_subplot(gs[0, 1]), "Contas Únicas", f"{kpi_contas:,.0f}".replace(',', '.'), "#333")
draw_card(fig.add_subplot(gs[0, 2]), "Contas Fraudulentas", f"{kpi_fraudes:,.0f}".replace(',', '.'), "#333")
draw_card(fig.add_subplot(gs[0, 3]), "Valor em Risco", f"R$ {kpi_valor/1e6:.1f}M", "#333")

# Rosca 1: Classificação
ax_p1 = fig.add_subplot(gs[1, 0:2])
sizes1 = [kpi_contas - kpi_fraudes, kpi_fraudes]
wedges, texts, autotexts = ax_p1.pie(sizes1, labels=['Normal', 'Fraude'], colors=PALETTE_PRED, autopct='%1.1f%%', 
                                     explode=(0, 0.1), pctdistance=0.85, startangle=90)
for autotext in autotexts:
    autotext.set_color('white')
    autotext.set_fontsize(14)
    autotext.set_fontweight('bold')
ax_p1.add_artist(plt.Circle((0,0),0.70,fc='white'))
ax_p1.set_title('Distribuição Total de Classes', fontweight='bold', pad=20)

# Moldura Rosca 1
rect_ax1 = fig.add_subplot(gs[1, 0:2], frameon=False)
rect_ax1.set_xticks([]); rect_ax1.set_yticks([])
add_border(rect_ax1, kind='rect')

# Rosca 2: Papéis
ax_p2 = fig.add_subplot(gs[1, 2:4])
fraudes = df[df['is_fraud_pred'] == 1]
if 'role' in fraudes.columns and not fraudes.empty:
    rc = fraudes['role'].value_counts()
    cols = [PALETTE_ROLES.get(r, 'gray') for r in rc.index]
    wedges2, texts2, autotexts2 = ax_p2.pie(rc, labels=rc.index, colors=cols, autopct='%1.1f%%', 
                                            pctdistance=0.85, startangle=140)
    for autotext in autotexts2:
        autotext.set_color('white')
        autotext.set_fontsize(14)
        autotext.set_fontweight('bold')
    ax_p2.add_artist(plt.Circle((0,0),0.70,fc='white'))
    ax_p2.set_title('Distribuição de Papéis (Fraudes)', fontweight='bold', pad=20)
    
    # Moldura Rosca 2
    rect_ax2 = fig.add_subplot(gs[1, 2:4], frameon=False)
    rect_ax2.set_xticks([]); rect_ax2.set_yticks([])
    add_border(rect_ax2, kind='rect')

plt.savefig(f'{save_path}/dashboard_kpi.png', bbox_inches='tight')
plt.close()


# --- IMAGEM 2: SCATTER PLOT ---
if 'ratio' in df.columns and 'avg_retention_hours' in df.columns:
    plt.figure(figsize=(12, 7), dpi=300)
    df_viz = df[(df['ratio'] <= 1.5) & (df['avg_retention_hours'] < 200)]
    palette_dict = {0: COLOR_NORMAL, 1: COLOR_FRAUD}
    
    ax = sns.scatterplot(
        data=df_viz, x='ratio', y='avg_retention_hours', hue='is_fraud_pred', 
        palette=palette_dict, hue_order=[0, 1], alpha=0.6, s=50
    )
    plt.title('Padrão de Comportamento: Dispersão de Classes', fontweight='bold', fontsize=16)
    plt.xlabel('Taxa de Repasse (Saída/Entrada)'); plt.ylabel('Tempo Médio de Retenção (Horas)')
    
    try:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles=handles, labels=['Não', 'Sim'], title='É Fraude?')
    except: pass
    
    add_border(ax, kind='rect')
    plt.savefig(f'{save_path}/scatter_plot.png', bbox_inches='tight')
    plt.close()


# --- IMAGEM 3: BARRAS (CONEXÕES) ---
if 'in_degree' in df.columns and 'out_degree' in df.columns:
    plt.figure(figsize=(10, 6), dpi=300)
    df_bar = df.groupby('is_fraud_pred')[['in_degree', 'out_degree']].mean().reset_index()
    df_melt = df_bar.melt(id_vars='is_fraud_pred')
    df_melt['variable'] = df_melt['variable'].map({'in_degree': 'In Degree (Recebe)', 'out_degree': 'Out Degree (Envia)'})
    
    ax = sns.barplot(data=df_melt, x='variable', y='value', hue='is_fraud_pred', 
                     palette={0: COLOR_NORMAL, 1: COLOR_FRAUD}, hue_order=[0, 1])
    plt.title('Média de Graus de Entrada e Saída por Classe', fontweight='bold', fontsize=16)
    plt.ylabel('Média de Conexões'); plt.xlabel('')
    
    try:
        handles, labels = ax.get_legend_handles_labels()
        ax.legend(handles=handles, labels=['Não', 'Sim'], title='É Fraude?')
    except: pass

    plt.savefig(f'{save_path}/bar_chart_class.png', bbox_inches='tight')
    plt.close()


# --- IMAGEM 4: COMPARATIVO DE MODELOS ---
fig, ax = plt.subplots(figsize=(8, 5), dpi=300)
df_melted = df_metrics[['Recall', 'Precision']].reset_index().melt(id_vars='Modelo')
sns.barplot(data=df_melted, x='Modelo', y='value', hue='variable', palette=[COLOR_NORMAL, COLOR_FRAUD], ax=ax)
ax.set_title('Comparativo de Performance', fontweight='bold', color=COLOR_TEXT)
ax.set_ylim(0.5, 1.05)
ax.legend(title='Métrica', loc='lower right')
add_border(ax, kind='spines')
plt.savefig(f'{save_path}/bar_chart_models.png', bbox_inches='tight')
plt.close()


# --- IMAGEM 5: TABELA DE MÉTRICAS ---
fig, ax = plt.subplots(figsize=(6, 3), dpi=300)
ax.axis('off')
col_labels = ['AUC', 'Recall', 'Precision']
cell_text = [[f"{row.AUC:.2f}", f"{row.Recall:.2f}", f"{row.Precision:.2f}"] for row in df_metrics.itertuples()]
the_table = ax.table(cellText=cell_text, rowLabels=df_metrics.index, colLabels=col_labels, loc='center', cellLoc='center')
the_table.scale(1, 1.8)
the_table.auto_set_font_size(False); the_table.set_fontsize(12)

# Estilização condicional da tabela
cells = the_table.get_celld()
for col_idx, col_name in enumerate(col_labels):
    max_val = df_metrics[col_name].max()
    for row_idx, val in enumerate(df_metrics[col_name]):
        cell = cells[(row_idx+1, col_idx)]
        if val == max_val: 
            cell.set_facecolor(COLOR_NORMAL)
            cell.get_text().set_color('white') 
            cell.get_text().set_weight('bold')
        else:
            cell.set_facecolor('white')

ax.set_title('Resumo Técnico das Métricas', fontweight='bold', y=0.9)
add_border(ax, kind='rect')
plt.savefig(f'{save_path}/model_metrics.png', bbox_inches='tight')
plt.close()


# --- IMAGEM 6: MATRIZ DE CONFUSÃO ---
fig, ax = plt.subplots(figsize=(5, 5), dpi=300)
try:
    cm = confusion_matrix(y_real, y_pred)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False, 
                annot_kws={"size": 16, "weight": "bold"}, ax=ax)
    ax.set_title('Matriz de Confusão (XGBoost)', fontweight='bold', color=COLOR_TEXT)
    ax.set_ylabel('Real (Gabarito)'); ax.set_xlabel('Predito (Modelo)')
    ax.set_xticklabels(['Normal', 'Fraude']); ax.set_yticklabels(['Normal', 'Fraude'])
    add_border(ax, kind='spines')
except ValueError:
    print("Erro ao gerar matriz de confusão.")

plt.savefig(f'{save_path}/fraud_matrix.png', bbox_inches='tight')
plt.close()

print(f"Imagens salvas em: {os.path.abspath(save_path)}")

Imagens salvas em: c:\Users\usuario\Desktop\Data Science\Projeto AML\assets
