# Importar librerias y datos

In [82]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.utils.class_weight import compute_class_weight

# Cargar datos
df = pd.read_csv('datos/credit_card.csv')
labels = pd.read_csv('datos/credit_card_label.csv')

# Merge
data = df.merge(labels, on='Ind_ID')

# Sacar id
data = data.drop(columns=['Ind_ID'])

# Separar features y target
X = data.drop(columns=['label'])
y = data['label']

# One-hot encoding 
X = pd.get_dummies(X)

# Dividir en train, validation y test 
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Imputar valores faltantes con la media
imputer = SimpleImputer(strategy='mean')
X_train = imputer.fit_transform(X_train)
X_val = imputer.transform(X_val)
X_test = imputer.transform(X_test)

print(f"Train shape: {X_train.shape}")
print(f"Validation shape: {X_val.shape}")
print(f"Test shape: {X_test.shape}")

Train shape: (1083, 53)
Validation shape: (232, 53)
Test shape: (233, 53)


## Network 1

In [83]:
# 2 capas (32 y 16 neuronas)
num_classes = 1  
input_shape = X_train.shape[1]  

model1 = keras.Sequential(
    [
        keras.Input(shape=(input_shape,)),
        layers.Dense(32, activation="relu"),
        layers.Dense(16, activation="relu"),
        layers.Dense(num_classes, activation="sigmoid"),
    ]
)
model1.summary()

In [84]:

model1.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
history1 = model1.fit(X_train, y_train, batch_size=32, epochs=50, validation_data=(X_val, y_val))

Epoch 1/50
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.4848 - loss: 12654.7129 - val_accuracy: 0.9095 - val_loss: 1229.6683
Epoch 2/50
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 968us/step - accuracy: 0.8726 - loss: 1722.7191 - val_accuracy: 0.8922 - val_loss: 312.6680
Epoch 3/50
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 921us/step - accuracy: 0.7368 - loss: 353.8226 - val_accuracy: 0.8491 - val_loss: 107.0119
Epoch 4/50
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 881us/step - accuracy: 0.7572 - loss: 142.5706 - val_accuracy: 0.8190 - val_loss: 35.4240
Epoch 5/50
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 876us/step - accuracy: 0.7378 - loss: 77.8633 - val_accuracy: 0.9052 - val_loss: 39.2551
Epoch 6/50
[1m34/34[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 879us/step - accuracy: 0.7886 - loss: 124.3811 - val_accuracy: 0.9052 - val_loss: 134.6349
Epoc

In [85]:

score1 = model1.evaluate(X_val, y_val, batch_size=32, verbose=0)
print("Validation loss:", score1[0])
print("Validation accuracy:", score1[1])

Validation loss: 60.196407318115234
Validation accuracy: 0.9094827771186829


## Network 2

In [86]:
# neuronas (128, 64, 32), más capas y dropout 
# Dropout: 0.3 y 0.2 para evitar overfitting
# Optimizador: rmsprop
# Batch size: 64
model2 = keras.Sequential(
    [
        keras.Input(shape=(input_shape,)),
        layers.Dense(128, activation="relu"),
        layers.Dropout(0.3),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.2),
        layers.Dense(32, activation="relu"),
        layers.Dense(num_classes, activation="sigmoid"),
    ]
)
model2.summary()

In [87]:
model2.compile(loss="binary_crossentropy", optimizer="rmsprop", metrics=["accuracy"])
history2 = model2.fit(X_train, y_train, batch_size=64, epochs=50, validation_data=(X_val, y_val))

Epoch 1/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.7276 - loss: 3201.9639 - val_accuracy: 0.9095 - val_loss: 691.5573
Epoch 2/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7692 - loss: 1692.0570 - val_accuracy: 0.9095 - val_loss: 336.2500
Epoch 3/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7821 - loss: 1129.9766 - val_accuracy: 0.9095 - val_loss: 338.3461
Epoch 4/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7812 - loss: 908.5715 - val_accuracy: 0.9095 - val_loss: 114.3849
Epoch 5/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7747 - loss: 681.7516 - val_accuracy: 0.9095 - val_loss: 161.7653
Epoch 6/50
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.7812 - loss: 601.7053 - val_accuracy: 0.8750 - val_loss: 97.7059
Epoch 7/50
[1

In [88]:
score2 = model2.evaluate(X_val, y_val, batch_size=32, verbose=0)
print("Validation loss:", score2[0])
print("Validation accuracy:", score2[1])

Validation loss: 0.3233559727668762
Validation accuracy: 0.9094827771186829


## Network 3

In [89]:
# 1 capa 24 neuronas
# Función de activación: tanh 
# Optimizador: SGD con learning rate 0.01
# Batch size: 16
model3 = keras.Sequential(
    [
        keras.Input(shape=(input_shape,)),
        layers.Dense(24, activation="tanh"),
        layers.Dense(num_classes, activation="sigmoid"),
    ]
)
model3.summary()

In [90]:
model3.compile(loss="binary_crossentropy", optimizer=keras.optimizers.SGD(learning_rate=0.01), metrics=["accuracy"])
history3 = model3.fit(X_train, y_train, batch_size=16, epochs=50, validation_data=(X_val, y_val))

Epoch 1/50
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8070 - loss: 0.4513 - val_accuracy: 0.8319 - val_loss: 0.3539
Epoch 2/50
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 611us/step - accuracy: 0.8633 - loss: 0.3941 - val_accuracy: 0.9095 - val_loss: 0.3319
Epoch 3/50
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 586us/step - accuracy: 0.8763 - loss: 0.3822 - val_accuracy: 0.9095 - val_loss: 0.3192
Epoch 4/50
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 615us/step - accuracy: 0.8763 - loss: 0.3776 - val_accuracy: 0.9095 - val_loss: 0.3171
Epoch 5/50
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 589us/step - accuracy: 0.8763 - loss: 0.3747 - val_accuracy: 0.9095 - val_loss: 0.3158
Epoch 6/50
[1m68/68[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 600us/step - accuracy: 0.8763 - loss: 0.3729 - val_accuracy: 0.9095 - val_loss: 0.3142
Epoch 7/50
[1m68/68[0m [32m

In [91]:
score3 = model3.evaluate(X_val, y_val, batch_size=32, verbose=0)
print("Validation loss:", score3[0])
print("Validation accuracy:", score3[1])

Validation loss: 0.309378981590271
Validation accuracy: 0.9094827771186829


## Selección del mejor modelo y evaluación en test

In [92]:
print("Network 1 - Validation accuracy:", score1[1])
print("Network 2 - Validation accuracy:", score2[1])
print("Network 3 - Validation accuracy:", score3[1])

best_score = max(score1[1], score2[1], score3[1])
if best_score == score1[1]:
    best_model = model1
    print("\nMejor modelo: Network 1")
elif best_score == score2[1]:
    best_model = model2
    print("\nMejor modelo: Network 2")
else:
    best_model = model3
    print("\nMejor modelo: Network 3")

test_score = best_model.evaluate(X_test, y_test, batch_size=32, verbose=0)
print("Test loss:", test_score[0])
print("Test accuracy:", test_score[1])

Network 1 - Validation accuracy: 0.9094827771186829
Network 2 - Validation accuracy: 0.9094827771186829
Network 3 - Validation accuracy: 0.9094827771186829

Mejor modelo: Network 1
Test loss: 62.255069732666016
Test accuracy: 0.9055793881416321


## Comparación con TP1

In [93]:
print("Resultados TP1:")
print("Regresión Logística - Accuracy: 0.9054")
print("Árbol de Decisión (GridSearchCV) - Accuracy: 0.8860")
print("Random Forest (GridSearchCV) - Accuracy: 0.9204")
print("\nResultado mejor red neuronal (TP2):")
print(f"Test accuracy: {test_score[1]:.4f}")
if test_score[1] > 0.9204:
    print("- La red neuronal superó al mejor modelo del TP1.")
else:
    print("- Random Forest del TP1 tenía el mejor accuracy (0.9204)")
print("\nObservación: Las redes 2 y 3 tienen el mismo validation accuracy porque las dos predicen siempre la clase mayoritaria (clase 0) porque el")
print(" dataset esta desbalanceado (88.7% clase 0 vs 11.3% clase 1). Se podría usar class_weight en el fit para darle más importancia a la clase minoritaria en el entrenamiento.")

Resultados TP1:
Regresión Logística - Accuracy: 0.9054
Árbol de Decisión (GridSearchCV) - Accuracy: 0.8860
Random Forest (GridSearchCV) - Accuracy: 0.9204

Resultado mejor red neuronal (TP2):
Test accuracy: 0.9056
- Random Forest del TP1 tenía el mejor accuracy (0.9204)

Observación: Las redes 2 y 3 tienen el mismo validation accuracy porque las dos predicen siempre la clase mayoritaria (clase 0) porque el
 dataset esta desbalanceado (88.7% clase 0 vs 11.3% clase 1). Se podría usar class_weight en el fit para darle más importancia a la clase minoritaria en el entrenamiento.
