# 🔗 Proyecto de Detección de Fraude - Google Colab

In [None]:
# ✅ Clonar el repositorio (asegúrate de que sea público)
!git clone https://github.com/solivare/DeepNeuralNetworkUSS.git
%cd DeepNeuralNetworkUSS/Examples/Fraud


In [None]:
# ✅ Instalar librerías necesarias
!pip install -r requirements.txt


In [None]:
# ✅ Descargar el dataset desde Google Drive (archivo ya compartido públicamente)
!pip install -q gdown
!mkdir -p data
!gdown --id 1KVCoiYWN9mtDnQhhqDNeEpDr9Az9jKVa -O data/creditcard.csv


In [None]:
# ✅ Procesar los datos
!python src/preprocess.py


In [None]:
# ✅ Entrenar modelo NN
!python -c "import src.train as train; train.train()"

In [None]:
# ✅ Carga de configuracion
import sys, os
sys.path.append("src")

import train
import evaluate
from utils import *

import pandas as pd
from tensorflow.keras.models import load_model
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression

# Cargar configuración y datos
config = train.load_config()
train_path = get_training_data_path(config)
X_train, y_train = train.load_data(train_path, config["base_dir"])
X_test, y_test = train.load_data(config["paths"]["test"], config["base_dir"])

# Escalado
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# ✅ Carga de modelo NN y entrenamiento modelo Regresion Logistica

# Red Neuronal
model_path = os.path.join(config["base_dir"], "models", "model.keras")
nn_model = load_model(model_path)
y_prob_nn = nn_model.predict(X_test_scaled).flatten()
y_prob_train_nn = nn_model.predict(X_train_scaled).flatten()
y_pred_nn = (y_prob_nn > 0.5).astype(int)

# Regresión Logística
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_scaled, y_train)
y_prob_lr = lr_model.predict_proba(X_test_scaled)[:, 1]
y_pred_lr = (y_prob_lr > 0.5).astype(int)

# Grafico Loss Function y Precision para la NN
plot_training(history)

In [None]:
# ✅ Evaluación: generación de métricas y visualizaciones

from sklearn.metrics import classification_report, roc_auc_score, roc_curve

print("🔎 Regressión Logística")
print(classification_report(y_test, y_pred_lr, target_names=["No Fraude", "Fraude"]))
print("AUC:", roc_auc_score(y_test, y_prob_lr))

print("\n🔎 Red Neuronal")
print(classification_report(y_test, y_pred_nn, target_names=["No Fraude", "Fraude"]))
print("AUC:", roc_auc_score(y_test, y_prob_nn))

In [None]:
# ✅ Comparar curvas ROC
fpr_lr, tpr_lr, _ = roc_curve(y_test, y_prob_lr)
fpr_nn, tpr_nn, _ = roc_curve(y_test, y_prob_nn)

plt.figure(figsize=(8,6))
plt.plot(fpr_lr, tpr_lr, label="Regresión Logística")
plt.plot(fpr_nn, tpr_nn, label="Red Neuronal")
plt.plot([0,1], [0,1], 'k--', alpha=0.5)
plt.xlabel("Tasa de falsos positivos")
plt.ylabel("Tasa de verdaderos positivos")
plt.title("Curva ROC - Comparación de Modelos")
plt.legend()
plt.grid()
plt.show()

In [None]:
# ✅  Metricas de modelos

# Matrices de confusión
plot_confusion_matrix(y_test, y_pred_lr, model_name="Regresión Logística", cmap="Blues")
plot_confusion_matrix(y_test, y_pred_nn, model_name="Red Neuronal", cmap="Greens")

# Tabla de métricas
df_metrics = get_classification_metrics_df(
    y_test, y_pred_lr, y_prob_lr,
    y_test, y_pred_nn, y_prob_nn
)
display(df_metrics)

In [None]:
# ✅ Distribución de predicciones para NN
plot_prediction_distribution(y_test, y_prob_nn)
plot_f1_vs_threshold(y_test, y_prob_nn, modelo="Red Neuronal")

In [None]:
# ✅ Distribución de predicciones para Regresión Logística
plot_prediction_distribution(y_test, y_prob_lr)
plot_f1_vs_threshold(y_test, y_prob_lr, modelo="Regresión Logística")

In [None]:
# ✅ Evaluación de sobreajuste en la red neuronal
plot_ks_overtraining(y_train, y_prob_train_nn, y_test, y_prob_nn)