In [1]:
import pickle
import numpy as np
import time
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, confusion_matrix, classification_report, f1_score,
    precision_score, recall_score, top_k_accuracy_score
)
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical

# Cargar datos
data_dict = pickle.load(open('../model/data.pickle', 'rb'))
data = np.asarray(data_dict['data'])
labels = np.asarray(data_dict['labels'])

# Codificar etiquetas
le = LabelEncoder()
labels_encoded = le.fit_transform(labels)
num_classes = len(le.classes_)

# Separar datos
x_train, x_test, y_train, y_test = train_test_split(
    data, labels_encoded, test_size=0.2, shuffle=True, stratify=labels_encoded
)

results = {}

# 1. XGBoost
start = time.time()
xgb = XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb.fit(x_train, y_train)
train_time = time.time() - start
y_pred_xgb = xgb.predict(x_test)
y_pred_proba_xgb = xgb.predict_proba(x_test)
results['XGBoost'] = {
    'accuracy': accuracy_score(y_test, y_pred_xgb),
    'f1_macro': f1_score(y_test, y_pred_xgb, average='macro'),
    'precision_macro': precision_score(y_test, y_pred_xgb, average='macro'),
    'recall_macro': recall_score(y_test, y_pred_xgb, average='macro'),
    'confusion_matrix': confusion_matrix(y_test, y_pred_xgb),
    'classification_report': classification_report(y_test, y_pred_xgb, target_names=le.classes_),
    'top3_accuracy': top_k_accuracy_score(y_test, y_pred_proba_xgb, k=3),
    'train_time': train_time
}

# 2. Regresión Logística
start = time.time()
lr = LogisticRegression(max_iter=1000, multi_class='multinomial')
lr.fit(x_train, y_train)
train_time = time.time() - start
y_pred_lr = lr.predict(x_test)
y_pred_proba_lr = lr.predict_proba(x_test)
results['LogisticRegression'] = {
    'accuracy': accuracy_score(y_test, y_pred_lr),
    'f1_macro': f1_score(y_test, y_pred_lr, average='macro'),
    'precision_macro': precision_score(y_test, y_pred_lr, average='macro'),
    'recall_macro': recall_score(y_test, y_pred_lr, average='macro'),
    'confusion_matrix': confusion_matrix(y_test, y_pred_lr),
    'classification_report': classification_report(y_test, y_pred_lr, target_names=le.classes_),
    'top3_accuracy': top_k_accuracy_score(y_test, y_pred_proba_lr, k=3),
    'train_time': train_time
}

# 3. Red Neuronal (Keras)
y_train_cat = to_categorical(y_train, num_classes)
y_test_cat = to_categorical(y_test, num_classes)
input_dim = x_train.shape[1]

model_nn = Sequential([
    Dense(128, activation='relu', input_shape=(input_dim,)),
    Dropout(0.3),
    Dense(64, activation='relu'),
    Dropout(0.3),
    Dense(num_classes, activation='softmax')
])

model_nn.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

start = time.time()
model_nn.fit(x_train, y_train_cat, epochs=20, batch_size=32, verbose=0)
train_time = time.time() - start

y_pred_nn = np.argmax(model_nn.predict(x_test), axis=1)
y_pred_proba_nn = model_nn.predict(x_test)

results['NeuralNetwork'] = {
    'accuracy': accuracy_score(y_test, y_pred_nn),
    'f1_macro': f1_score(y_test, y_pred_nn, average='macro'),
    'precision_macro': precision_score(y_test, y_pred_nn, average='macro'),
    'recall_macro': recall_score(y_test, y_pred_nn, average='macro'),
    'confusion_matrix': confusion_matrix(y_test, y_pred_nn),
    'classification_report': classification_report(y_test, y_pred_nn, target_names=le.classes_),
    'top3_accuracy': top_k_accuracy_score(y_test, y_pred_proba_nn, k=3),
    'train_time': train_time
}

# Mostrar resultados
for model_name, metrics in results.items():
    print(f"\n=== {model_name} ===")
    print(f"Accuracy: {metrics['accuracy']:.4f}")
    print(f"F1 Macro: {metrics['f1_macro']:.4f}")
    print(f"Precision Macro: {metrics['precision_macro']:.4f}")
    print(f"Recall Macro: {metrics['recall_macro']:.4f}")
    print(f"Top-3 Accuracy: {metrics['top3_accuracy']:.4f}")
    print(f"Tiempo de entrenamiento: {metrics['train_time']:.2f} segundos")
    print("Reporte de clasificación:")
    print(metrics['classification_report'])
    # Si quieres ver la matriz de confusión:
    print("Matriz de confusión:\n", metrics['confusion_matrix'])

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 883us/step
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 668us/step

=== XGBoost ===
Accuracy: 0.9657
F1 Macro: 0.9634
Precision Macro: 0.9637
Recall Macro: 0.9638
Top-3 Accuracy: 0.9891
Tiempo de entrenamiento: 3.34 segundos
Reporte de clasificación:
              precision    recall  f1-score   support

           0       0.90      0.98      0.94        56
           1       0.99      0.99      0.99        94
           2       0.99      0.98      0.98        85
           3       1.00      0.99      0.99        99
           4       0.98      1.00      0.99        99
           5       0.99      1.00      1.00       100
           6       0.99      0.98      0.98        83
           7       0.99      1.00      0.99        96
           8       0.99      0.97      0.98        90
           9       1.00      1.00      1.00       100
           A       0.95      0.98      0.97        86
           B     