
# üåë NOIR - NETWORK-ORIENTED INTELLIGENCE FOR RISK (EM DESENVOLVIMENTO)
> Sistema Anti-Fraude de √öltima Gera√ß√£o
> 
> Miss√£o: Romper as ra√≠zes de esquemas criminosos financeiros atrav√©s de an√°lise de redes e IA
> Diferencial: N√£o detectamos transa√ß√µes - DESMANTELAMOS QUADRILHAS INTEIRAS



> IMPORTS & CONFIGURA√á√ÉO INICIAL

In [1]:
# Setup de Infraestrutura e Bibliotecas de An√°lise
import pandas as pd
import numpy as np  
import os

# Defini√ß√£o do path para o Data Lake do Kaggle
csv = '/kaggle/input/paysim1/PS_20174392719_1491204439457_log.csv'
print("üöÄ SYSTEM BOOT: Iniciando ingest√£o de dados no Noir...")

üöÄ SYSTEM BOOT: Iniciando ingest√£o de dados no Noir...


> IMPORTA√á√ÉO E ENRIQUECIMENTO DO PAYSIM1

In [2]:
print("="*80)
print("üì• IMPORTANDO DATASET PAYSIM1 (MODO PRO)...")
print("="*80)

df_raw = pd.read_csv(csv)

# 2. FILTRAR APENAS TRANSFER E CASH_OUT (Onde realmente acontecem os golpes)
df_raw = df_raw[df_raw['type'].isin(['TRANSFER', 'CASH_OUT'])]

print("üé≤ Realizando Amostragem Baseada em Contas...")
all_users = df_raw['nameOrig'].unique()
# Peguei 20.000 usu√°rios aleat√≥rios e trouxe  TODO o hist√≥rico deles
sampled_users = np.random.choice(all_users, 20000, replace=False)
df_sample = df_raw[df_raw['nameOrig'].isin(sampled_users)].copy()

print(f"‚úÖ Dataset Filtrado: {df_sample.shape} transa√ß√µes de {len(sampled_users)} usu√°rios √∫nicos.")


print("üîß CONSTRUINDO PERFIL CRIMINAL DAS CONTAS...")

# Agrega√ß√£o Poderosa
df_accounts = df_sample.groupby('nameOrig').agg({
    'step': ['min', 'max', 'count'],       # Tempo e Frequ√™ncia
    'amount': ['sum', 'mean', 'std', 'max'], # Volume e Variabilidade
    'nameDest': 'nunique',                 # Entropia de Destino
    'isFraud': 'max',                      # Se j√° foi pego
    'type': lambda x: (x == 'CASH_OUT').sum() # Prefer√™ncia por Saque
}).reset_index()

df_accounts.columns = [
    'account_id', 'first_step', 'last_step', 'n_transactions',
    'total_amount', 'avg_amount', 'std_amount', 'max_amount',
    'n_unique_beneficiaries', 'is_fraud', 'n_cashouts'
]

df_accounts['std_amount'] = df_accounts['std_amount'].fillna(0)


df_accounts['activity_window_hours'] = df_accounts['last_step'] - df_accounts['first_step']

# Burn Rate: Dinheiro por Hora
# (Adicionei +0.1 pra evitar divis√£o por zero em transa√ß√µes simult√¢neas)
df_accounts['burn_rate'] = df_accounts['total_amount'] / (df_accounts['activity_window_hours'] + 0.1)

# Cash Residence (L√≥gica ajustada)
# Se o cara movimenta muito em janela zero (tudo na mesma hora), o residence time √© quase zero.
df_accounts['cash_residence_hours'] = np.where(
    df_accounts['n_transactions'] > 1,
    df_accounts['activity_window_hours'] / df_accounts['n_transactions'],
    0.1 # Se fez s√≥ 1 transa√ß√£o, assumimos que foi "vapt-vupt" se for fraude
)

df_accounts['beneficiary_entropy'] = df_accounts['n_unique_beneficiaries'] / df_accounts['n_transactions']



n_accs = len(df_accounts)

dirty_devices = [f"DEV_DIRTY_{i}" for i in range(50)] 

clean_devices = [f"DEV_CLEAN_{i}" for i in range(n_accs)]

def assign_device(row):
    
    if row['is_fraud'] == 1:
        return np.random.choice(dirty_devices) if np.random.random() < 0.85 else f"DEV_UNIQUE_{np.random.randint(99999)}"
    
    else:
        return np.random.choice(clean_devices) if np.random.random() < 0.99 else np.random.choice(dirty_devices)

df_accounts['device_id'] = df_accounts.apply(assign_device, axis=1)


device_counts = df_accounts.groupby('device_id')['account_id'].count().to_dict()
df_accounts['device_sharing_score'] = df_accounts['device_id'].map(device_counts)


df_accounts['dormancy_score'] = np.where(
    (df_accounts['activity_window_hours'] <= 1) & (df_accounts['total_amount'] > 100000), 
    1.0, 0.0
)

df_accounts['crypto_exposure'] = df_accounts['n_cashouts'] / df_accounts['n_transactions']


df_accounts['fraud_network_distance'] = 0 
df_accounts['fraud_type'] = np.where(df_accounts['is_fraud'] == 1, 'account_takeover', 'legit')

df = df_accounts.copy()

print(f"‚úÖ PERFILAMENTO CONCLU√çDO!")
print(f"üìä Total de Contas √önicas: {len(df)}")
print(f"üö® Contas Comprometidas: {df['is_fraud'].sum()}")
print("\nüîç Amostra do NOIR:")
display(df.T)

üì• IMPORTANDO DATASET PAYSIM1 (MODO PRO)...
üé≤ Realizando Amostragem Baseada em Contas...
‚úÖ Dataset Filtrado: (20007, 11) transa√ß√µes de 20000 usu√°rios √∫nicos.
üîß CONSTRUINDO PERFIL CRIMINAL DAS CONTAS...
‚úÖ PERFILAMENTO CONCLU√çDO!
üìä Total de Contas √önicas: 20000
üö® Contas Comprometidas: 60

üîç Amostra do NOIR:


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19990,19991,19992,19993,19994,19995,19996,19997,19998,19999
account_id,C1000048260,C1000060941,C1000090505,C1000101287,C1000155349,C1000202702,C100033694,C1000500884,C1000541881,C10006175,...,C998705654,C998756586,C998814093,C99900861,C999082009,C999194863,C999221002,C999348029,C999392876,C999905047
first_step,45,42,181,302,376,36,228,590,284,325,...,329,597,234,280,379,373,204,330,503,490
last_step,45,42,181,302,376,36,228,590,284,325,...,329,597,234,280,379,373,204,330,503,490
n_transactions,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
total_amount,63301.32,110310.13,7541.34,311380.91,66353.59,1131259.23,343125.34,164023.26,183777.87,196383.11,...,168239.98,6401.32,89930.18,166719.52,72559.79,73236.64,284948.37,111435.76,701387.11,51242.17
avg_amount,63301.32,110310.13,7541.34,311380.91,66353.59,1131259.23,343125.34,164023.26,183777.87,196383.11,...,168239.98,6401.32,89930.18,166719.52,72559.79,73236.64,284948.37,111435.76,701387.11,51242.17
std_amount,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
max_amount,63301.32,110310.13,7541.34,311380.91,66353.59,1131259.23,343125.34,164023.26,183777.87,196383.11,...,168239.98,6401.32,89930.18,166719.52,72559.79,73236.64,284948.37,111435.76,701387.11,51242.17
n_unique_beneficiaries,1,1,1,1,1,1,1,1,1,1,...,1,1,1,1,1,1,1,1,1,1
is_fraud,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
