| Modelo                | MAPE     | RMSE      | R²       | Ljung-Box p-value | Jarque-Bera p-value |
|-----------------------|----------|-----------|----------|-------------------|---------------------|
| K-NN                  | 0.037967 | 0.143212  | 0.082747 | 0.224391          | 0.0                 |
| Linear Regression     | 1.989183 | 13.468950 | 0.657471 | 0.999805          | 0.0                 |
| Ridge Regression      | 1.989183 | 13.468953 | 0.657470 | 0.999805          | 0.0                 |
| Lasso Regression      | 1.925378 | 13.480709 | 0.656872 | 0.999793          | 0.0                 |
| Random Forest Regressor | —        | —         | —        | —                 | —                   |
| XGBoost Regressor     | —        | —         | —        | —                 | —                   |
| SVR                   | —        | —         | —        | —                 | —                   |                  |       |       |          |                         |                         |


KNN

In [None]:
import numpy as np
import pandas as pd
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.stats.stattools import jarque_bera
from sklearn.metrics import r2_score, mean_absolute_error, explained_variance_score, mean_squared_error, mean_squared_log_error

# Seleccionar las características y la variable objetivo
X = tornados_target[['mag', 'slat', 'slon', 'elat', 'elon', 'len', 'wid','fat','f1', 'f2', 'f3', 'f4']]
y = df['mortality']

# Dividir los datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=66)

# Ahora puedes crear el modelo sin el error
knn = KNeighborsRegressor(n_neighbors=81)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
score = knn.score(X_test, y_test)

# Cálculo de métricas
r2_valid = r2_score(y_test, y_pred)
mae_valid = mean_absolute_error(y_test, y_pred)
evs_valid = explained_variance_score(y_test, y_pred, multioutput='uniform_average')
rmse_valid = np.sqrt(mean_squared_error(y_test, y_pred))
rmsle_valid = np.sqrt(mean_squared_log_error(y_test, np.abs(y_pred)))  # Evitar valores negativos

# Prueba de Ljung-Box (residuos deben estar disponibles)
residuos = y_test - y_pred
ljung_box_test = acorr_ljungbox(residuos, lags=[10], return_df=True)

# Prueba de Jarque-Bera
jb_test = jarque_bera(residuos)

# Mostrar los resultados
print('R2 Valid:', r2_valid)
print('EVS Valid:', evs_valid)
print('MAE Valid:', mae_valid)
print('RMSE Valid:', rmse_valid)
print('RMSLE Valid:', rmsle_valid)
print('\nLjung-Box Test:')
print(ljung_box_test)
print('\nJarque-Bera Test:')
print(f'Estadístico: {jb_test[0]}, p-valor: {jb_test[1]}')

R2 Valid: 0.08274669092769293
EVS Valid: 0.08327231375666555
MAE Valid: 0.037966521313441075
RMSE Valid: 0.1432116600452836
RMSLE Valid: 0.09951451645787435

Ljung-Box Test:
     lb_stat  lb_pvalue
10  12.98715   0.224391

Jarque-Bera Test:
Estadístico: 973530.9803433096, p-valor: 0.

Regresión Linela Ridge and Lasso

In [None]:
# Definir X y y (asegúrate de que ya tienes estas variables previamente definidas)
X = df[['mag', 'slat', 'slon', 'elat', 'elon', 'len', 'wid','fat','f1', 'f2', 'f3', 'f4','loss']]
y = df['inj']

# Dividir los datos en conjunto de entrenamiento y prueba (80% entrenamiento, 20% prueba)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear y ajustar el modelo de regresión lineal
lr = LinearRegression().fit(X_train, y_train)
lasso = Lasso().fit(X_train, y_train)
ridge = Ridge(alpha=0.1).fit(X_train, y_train)

In [None]:
import numpy as np
import pandas as pd
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.stats.stattools import jarque_bera
from sklearn.linear_model import LinearRegression, Lasso, Ridge
from sklearn.metrics import r2_score, mean_absolute_error, explained_variance_score, mean_squared_error, mean_squared_log_error
from sklearn.model_selection import train_test_split


# Dividir en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Definir modelos
models = {
    'Linear Regression': LinearRegression(),
    'Lasso Regression': Lasso(alpha=0.1),
    'Ridge Regression': Ridge(alpha=0.1)
}

# DataFrame para almacenar resultados
results = []

# Evaluar cada modelo
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    # Cálculo de métricas
    r2_valid = r2_score(y_test, y_pred)
    mae_valid = mean_absolute_error(y_test, y_pred)
    evs_valid = explained_variance_score(y_test, y_pred, multioutput='uniform_average')
    rmse_valid = np.sqrt(mean_squared_error(y_test, y_pred))
    rmsle_valid = np.sqrt(mean_squared_log_error(y_test, np.abs(y_pred)))  # Evitar valores negativos
    
    # Prueba de Ljung-Box
    residuos = y_test - y_pred
    ljung_box_test = acorr_ljungbox(residuos, lags=[10], return_df=True)
    
    # Prueba de Jarque-Bera
    jb_test = jarque_bera(residuos)
    
    # Guardar resultados
    results.append([
        name, mae_valid, rmse_valid, r2_valid,
        ljung_box_test['lb_pvalue'].values[0],
        jb_test[1]
    ])

# Convertir resultados a DataFrame
results_df = pd.DataFrame(results, columns=[
    'Modelo', 'MAPE', 'RMSE', 'R^2', 'Ljung-Box p-value', 'Jarque-Bera p-value'
])

# Mostrar resultados
print(results_df)

Modelo      MAPE       RMSE       R^2  Ljung-Box p-value  \
0  Linear Regression  1.989183  13.468950  0.657471           0.999805   
1   Lasso Regression  1.925378  13.480709  0.656872           0.999793   
2   Ridge Regression  1.989183  13.468953  0.657470           0.999805   

   Jarque-Bera p-value  
0                  0.0  
1                  0.0  
2                  0.0

| Modelo                         | precision | recall  | accuracy | f₁-score | AUC     |
|--------------------------------|-----------|---------|----------|----------|---------|
| Clasificación Bayesiana        | 0.22      | 0.66    | 0.9094   | 0.33     | —       |
| K-NN                           | 1.00      | 0.31    | 0.9842   | 0.47     | —       |
| L1/L2 Penalty Logistic Regression | 0.69 (L1) | 0.33 (L1) | 0.9092 (L1) | 0.44 (L1) | —       |
|                                | 0.53 (L2) | 0.10 (L2) | 0.8906 (L2) | 0.16 (L2) | —       |
| Random Forest                  | 1.00      | 1.00    | 1.0000   | 1.00     | —       |
| XGBoost                        | 1.00      | 1.00    | 1.0000   | 1.00     | —       |
| SVM                            | 0.96      | 0.12    | 0.9700   | 0.21     | —       |

XGboost and Ramdon Forest

In [None]:
import xgboost as xgb
from sklearn.model_selection import train_test_split
import numpy as np

ruta = r'C:/Users/wmanj/OneDrive/Escritorio/MACHINELEARNING/tornados.csv'  
df = pd.read_csv(ruta) 
df['loss'] = df['loss'].replace(0, pd.NA)
df['loss'] = df['loss'].interpolate(method='linear')
df['mag'] = df['mag'].fillna(df['mag'].mean())
df.isnull().sum()

# Crear la columna 'mortality' en el DataFrame original
df['mortality'] = df['fat'].apply(lambda x: 0 if x == 0 else 1)

# Renombrar el DataFrame a 'mortality_target'
mortality_target = df
import numpy as np

# Crear la columna 'mortality' con 0 si 'fat' es 0, y 1 si 'fat' es mayor que 0
df['mortality'] = np.where(df['fat'] == 0, 0, 1)

# Contar la cantidad de ceros y unos
print("Cantidad de ceros:", (df['mortality'] == 0).sum())
print("Cantidad de unos:", (df['mortality'] == 1).sum())

# Asignar el DataFrame modificado a 'tornados.target'
tornados_target = df
from sklearn.model_selection import train_test_split
X = tornados_target[['om', 'yr', 'mo', 'dy', 'stf', 'mag', 'inj', 'fat', 'loss', 'slat', 'slon', 'elat', 'elon', 'len', 'wid', 'ns', 'sn', 'f1', 'f2', 'f3', 'f4']]
y = df['mortality']
# Dividir los datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

# Crear el modelo XGBoost
xgboost_model = xgb.XGBClassifier(random_state=0)
forest = RandomForestClassifier().fit(X_train, y_train)

In [None]:
import numpy as np
import pandas as pd
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.stats.stattools import jarque_bera
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.metrics import (
    accuracy_score, 
    log_loss, 
    roc_auc_score, 
    confusion_matrix, 
    classification_report
)
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler

def generar_datos(n_samples=500, n_features=5, ruido=0.3, seed=42):
    """Genera datos sintéticos para clasificación"""
    np.random.seed(seed)
    X = np.random.rand(n_samples, n_features)
    pesos = np.array([1.5, -2, 0.5, 3, -1])
    y = (X @ pesos + np.random.normal(0, ruido, n_samples) > 0).astype(int)
    return X, y

def evaluar_modelo(model, X_train, X_test, y_train, y_test):
    """Evalúa un modelo de clasificación con múltiples métricas"""
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    y_pred_proba = model.predict_proba(X_test)[:, 1]
    
    metricas = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'LogLoss': log_loss(y_test, y_pred_proba),
        'ROC AUC': roc_auc_score(y_test, y_pred_proba),
        'Confusion Matrix': confusion_matrix(y_test, y_pred).tolist(),
        'Cross-Val Score': np.mean(cross_val_score(model, X_train, y_train, cv=5))
    }
    
    # Pruebas estadísticas de residuos
    residuos = y_test - y_pred
    
    try:
        metricas['Ljung-Box p-value'] = acorr_ljungbox(residuos, lags=[10], return_df=True)['lb_pvalue'].values[0]
    except:
        metricas['Ljung-Box p-value'] = np.nan
    
    try:
        metricas['Jarque-Bera p-value'] = jarque_bera(residuos)[1]
    except:
        metricas['Jarque-Bera p-value'] = np.nan
    
    return metricas

def comparar_modelos(X, y, test_size=0.3, seed=42):
    """Compara múltiples modelos de clasificación"""
    # Normalizar datos
    scaler = StandardScaler()
    X_escalado = scaler.fit_transform(X)
    
    # Dividir datos
    X_train, X_test, y_train, y_test = train_test_split(
        X_escalado, y, test_size=test_size, random_state=seed
    )
    
    # Definir modelos
    modelos = {
        'Random Forest': RandomForestClassifier(random_state=seed),
        'XGBoost': xgb.XGBClassifier(
            use_label_encoder=False, 
            eval_metric='logloss', 
            random_state=seed
        )
    }
    
    # Resultados
    resultados = {}
    for nombre, modelo in modelos.items():
        resultados[nombre] = evaluar_modelo(modelo, X_train, X_test, y_train, y_test)
    
    # Convertir a DataFrame
    df_resultados = pd.DataFrame.from_dict(resultados, orient='index')
    
    return df_resultados

# Ejecutar análisis
X, y = generar_datos()
resultados = comparar_modelos(X, y)
print("Resultados de Comparación de Modelos:\n")
print(resultados)

# Exportar resultados
resultados.to_csv('resultados_modelos.csv')

Resultados de Comparación de Modelos:

               Accuracy   LogLoss   ROC AUC      Confusion Matrix  \
Random Forest  0.893333  0.235760  0.962262  [[17, 15], [1, 117]]   
XGBoost        0.893333  0.241258  0.961335  [[19, 13], [3, 115]]   

               Cross-Val Score  Ljung-Box p-value  Jarque-Bera p-value  
Random Forest         0.891429           0.270149         1.047062e-59  
XGBoost               0.900000           0.878839         1.114084e-56

Naive Bayes

In [None]:
import time
import pandas as pd
from sklearn.naive_bayes import BernoulliNB
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix, classification_report
)
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split

# 1. Repetimos la separación si no está hecha
X = df_ml.drop('mortality', axis=1)
y = df_ml['mortality']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=101, stratify=y
)

# 2. Aplicar SMOTE solo al conjunto de entrenamiento
sm = SMOTE(random_state=42)
X_train_sm, y_train_sm = sm.fit_resample(X_train, y_train)

print("Distribución después de SMOTE:")
print(y_train_sm.value_counts(normalize=True))

# 3. Escalar los datos
scaler = StandardScaler()
X_train_sm_scaled = scaler.fit_transform(X_train_sm)
X_test_scaled = scaler.transform(X_test)

# 4. Entrenar modelo con cronómetro ⏱️
start_time = time.time()
model = BernoulliNB()
model.fit(X_train_sm_scaled, y_train_sm)
tiempo_entrenamiento = time.time() - start_time

# 5. Predecir sobre el conjunto de prueba (sin SMOTE)
y_pred = model.predict(X_test_scaled)
y_prob = model.predict_proba(X_test_scaled)[:, 1]

# 6. Evaluar resultados
print("\n🔍 Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Matriz de confusión:\n", confusion_matrix(y_test, y_pred))
print("\n📋 Reporte de clasificación:\n", classification_report(y_test, y_pred))

# 7. Guardar métricas en DataFrame
resultados_bayes_smote = pd.DataFrame({
    'Modelo': ['Naive Bayes (con SMOTE)'],
    'Accuracy': [round(accuracy_score(y_test, y_pred), 4)],
    'Precision': [round(precision_score(y_test, y_pred), 4)],
    'Recall': [round(recall_score(y_test, y_pred), 4)],
    'F1-score': [round(f1_score(y_test, y_pred), 4)],
    'AUC': [round(roc_auc_score(y_test, y_prob), 4)],
    'CPU time (s)': [round(tiempo_entrenamiento, 4)]
})

# 8. Mostrar resultados
display(resultados_bayes_smote)

Distribución después de SMOTE:
mortality
0    0.5
1    0.5
Name: proportion, dtype: float64

🔍 Accuracy: 0.9094412331406551

📊 Matriz de confusión:
 [[11044   982]
 [  146   284]]

📋 Reporte de clasificación:
               precision    recall  f1-score   support

           0       0.99      0.92      0.95     12026
           1       0.22      0.66      0.33       430

    accuracy                           0.91     12456
   macro avg       0.61      0.79      0.64     12456
weighted avg       0.96      0.91      0.93     12456

SVM

In [None]:
from sklearn.svm import LinearSVC
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score,
    f1_score, roc_auc_score, confusion_matrix, classification_report
)
from sklearn.calibration import CalibratedClassifierCV
import pandas as pd
import time

# Medir tiempo de entrenamiento
inicio = time.time()

# Entrenamos con LinearSVC (no tiene predict_proba, así que usamos CalibratedClassifierCV)
linear_svc_base = LinearSVC(random_state=42, max_iter=10000)
calibrated_svc = CalibratedClassifierCV(linear_svc_base, cv=5)
calibrated_svc.fit(X_train_scaled, y_train)

fin = time.time()
tiempo_entrenamiento = fin - inicio

# Predecir
y_pred_linear_svc = calibrated_svc.predict(X_test_scaled)
y_prob_linear_svc = calibrated_svc.predict_proba(X_test_scaled)[:, 1]

# Guardar métricas
resultados_linear_svc = pd.DataFrame({
    'Modelo': ['LinearSVC (con calibración)'],
    'Accuracy': [round(accuracy_score(y_test, y_pred_linear_svc), 4)],
    'Precision': [round(precision_score(y_test, y_pred_linear_svc), 4)],
    'Recall': [round(recall_score(y_test, y_pred_linear_svc), 4)],
    'F1-score': [round(f1_score(y_test, y_pred_linear_svc), 4)],
    'AUC': [round(roc_auc_score(y_test, y_prob_linear_svc), 4)],
    'CPU time (s)': [round(tiempo_entrenamiento, 4)]
})

# Verificar matriz y reporte si quieres
print("📊 Matriz de Confusión:\n", confusion_matrix(y_test, y_pred_linear_svc))
print("\n📋 Reporte de Clasificación:\n", classification_report(y_test, y_pred_linear_svc))

display(resultados_linear_svc)

📊 Matriz de Confusión:
 [[12024     2]
 [  378    52]]

📋 Reporte de Clasificación:
               precision    recall  f1-score   support

           0       0.97      1.00      0.98     12026
           1       0.96      0.12      0.21       430

    accuracy                           0.97     12456
   macro avg       0.97      0.56      0.60     12456
weighted avg       0.97      0.97      0.96     12456



Unnamed: 0,Modelo,Accuracy,Precision,Recall,F1-score,AUC,CPU time (s)
0,LinearSVC (con calibración),0.9695,0.963,0.1209,0.2149,0.919,3.4848


L1 and L2

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# Convertir 'inj' a binario si no lo es (ej: 0=no heridos, 1=heridos)
y_binary = (y > 0).astype(int)  # Ajusta según tu contexto

# Dividir datos
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Modelo con penalización L1 (Lasso) o L2 (Ridge)
model_l1 = LogisticRegression(penalty='l1', solver='liblinear', C=1.0).fit(X_train, y_train)  # L1
model_l2 = LogisticRegression(penalty='l2', solver='liblinear', C=1.0).fit(X_train, y_train)  # L2

# Evaluación
print("Precisión (L1):", accuracy_score(y_test, model_l1.predict(X_test)))
print("Reporte (L1):\n", classification_report(y_test, model_l1.predict(X_test)))
print("Precisión (L2):", accuracy_score(y_test, model_l2.predict(X_test)))
print("Reporte (L2):\n", classification_report(y_test, model_l2.predict(X_test)))


Precisión (L1): 0.9091636945920373
Reporte (L1):
               precision    recall  f1-score   support

           0       0.92      0.98      0.95     12219
           1       0.69      0.33      0.44      1520

    accuracy                           0.91     13739
   macro avg       0.81      0.65      0.70     13739
weighted avg       0.90      0.91      0.89     13739

Precisión (L2): 0.8906033918043526
Reporte (L2):
               precision    recall  f1-score   support

           0       0.90      0.99      0.94     12219
           1       0.53      0.10      0.16      1520

    accuracy                           0.89     13739
   macro avg       0.71      0.54      0.55     13739
weighted avg       0.86      0.89      0.86     13739

KNN

In [None]:
import time
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Seleccionar las características y la variable objetivo
X = tornados_target[['mag', 'slat', 'slon', 'elat', 'elon', 'len', 'wid', 'fat', 'f1', 'f2', 'f3', 'f4']]
y = df['mortality']

# Dividir los datos en conjunto de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=101, stratify=y)

# Estándarizar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Entrenar el modelo y medir el tiempo de entrenamiento
start_time = time.time()
knn = KNeighborsClassifier(n_neighbors=81)
knn.fit(X_train, y_train)
y_pred = knn.predict(X_test)
y_prob = knn.predict_proba(X_test)[:, 1]  # Probabilidades de la clase positiva
end_time = time.time()

# Medir el tiempo de entrenamiento
tiempo_entrenamiento = end_time - start_time

# Evaluar resultados
print("\n🔍 Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Matriz de confusión:\n", confusion_matrix(y_test, y_pred))
print("\n📋 Reporte de clasificación:\n", classification_report(y_test, y_pred))

# Guardar métricas en DataFrame
resultados_bayes_smote = pd.DataFrame({
    'Modelo': ['K-Neighbors (Clasificador)'],
    'Accuracy': [round(accuracy_score(y_test, y_pred), 4)],
    'Precision': [round(precision_score(y_test, y_pred), 4)],
    'Recall': [round(recall_score(y_test, y_pred), 4)],
    'F1-score': [round(f1_score(y_test, y_pred), 4)],
    'AUC': [round(roc_auc_score(y_test, y_prob), 4)],
    'CPU time (s)': [round(tiempo_entrenamiento, 4)]
})

🔍 Accuracy: 0.984180900621118

📊 Matriz de confusión:
 [[20136     0]
 [  326   146]]

📋 Reporte de clasificación:
               precision    recall  f1-score   support

           0       0.98      1.00      0.99     20136
           1       1.00      0.31      0.47       472

    accuracy                           0.98     20608
   macro avg       0.99      0.65      0.73     20608
weighted avg       0.98      0.98      0.98     20608

Ramdon Forest XGBoost

In [None]:
import time
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, confusion_matrix, classification_report)
from sklearn.preprocessing import StandardScaler

# Cargar los datos
ruta = r'C:/Users/wmanj/OneDrive/Escritorio/MACHINELEARNING/tornados.csv'  
df = pd.read_csv(ruta) 

# Preprocesamiento de datos
df['loss'] = df['loss'].replace(0, pd.NA)
df['loss'] = df['loss'].interpolate(method='linear')
df['mag'] = df['mag'].fillna(df['mag'].mean())
df['mortality'] = np.where(df['fat'] == 0, 0, 1)

# Dividir los datos
X = df[['om', 'yr', 'mo', 'dy', 'stf', 'mag', 'inj', 'fat', 'loss', 'slat', 'slon', 'elat', 'elon', 'len', 'wid', 'ns', 'sn', 'f1', 'f2', 'f3', 'f4']]
y = df['mortality']
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)

# Estándarización de los datos (si es necesario para algunos modelos)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Entrenar y evaluar el modelo XGBoost
start_time = time.time()
xgboost_model = xgb.XGBClassifier(random_state=0)
xgboost_model.fit(X_train, y_train)
y_pred_xgb = xgboost_model.predict(X_test)
y_prob_xgb = xgboost_model.predict_proba(X_test)[:, 1]
end_time = time.time()

# Medir tiempo de entrenamiento
xgboost_time = end_time - start_time

# Evaluar XGBoost
print("\n📊 Evaluación de XGBoost:")
print("\n🔍 Accuracy:", accuracy_score(y_test, y_pred_xgb))
print("\n📊 Matriz de confusión:\n", confusion_matrix(y_test, y_pred_xgb))
print("\n📋 Reporte de clasificación:\n", classification_report(y_test, y_pred_xgb))

# Guardar métricas en DataFrame para XGBoost
resultados_xgb = pd.DataFrame({
    'Modelo': ['XGBoost'],
    'Accuracy': [round(accuracy_score(y_test, y_pred_xgb), 4)],
    'Precision': [round(precision_score(y_test, y_pred_xgb), 4)],
    'Recall': [round(recall_score(y_test, y_pred_xgb), 4)],
    'F1-score': [round(f1_score(y_test, y_pred_xgb), 4)],
    'AUC': [round(roc_auc_score(y_test, y_prob_xgb), 4)],
    'CPU time (s)': [round(xgboost_time, 4)]
})

# Entrenar y evaluar el modelo RandomForest
start_time = time.time()
forest = RandomForestClassifier(random_state=0)
forest.fit(X_train, y_train)
y_pred_rf = forest.predict(X_test)
y_prob_rf = forest.predict_proba(X_test)[:, 1]
end_time = time.time()

# Medir tiempo de entrenamiento
forest_time = end_time - start_time

# Evaluar RandomForest
print("\n📊 Evaluación de Random Forest:")
print("\n🔍 Accuracy:", accuracy_score(y_test, y_pred_rf))
print("\n📊 Matriz de confusión:\n", confusion_matrix(y_test, y_pred_rf))
print("\n📋 Reporte de clasificación:\n", classification_report(y_test, y_pred_rf))

# Guardar métricas en DataFrame para Random Forest
resultados_rf = pd.DataFrame({
    'Modelo': ['Random Forest'],
    'Accuracy': [round(accuracy_score(y_test, y_pred_rf), 4)],
    'Precision': [round(precision_score(y_test, y_pred_rf), 4)],
    'Recall': [round(recall_score(y_test, y_pred_rf), 4)],
    'F1-score': [round(f1_score(y_test, y_pred_rf), 4)],
    'AUC': [round(roc_auc_score(y_test, y_prob_rf), 4)],
    'CPU time (s)': [round(forest_time, 4)]
})

# Unir los resultados de ambos modelos
resultados_completos = pd.concat([resultados_xgb, resultados_rf], ignore_index=True)

📊 Evaluación de XGBoost:

🔍 Accuracy: 1.0

📊 Matriz de confusión:
 [[16781     0]
 [    0   393]]

📋 Reporte de clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     16781
           1       1.00      1.00      1.00       393

    accuracy                           1.00     17174
   macro avg       1.00      1.00      1.00     17174
weighted avg       1.00      1.00      1.00     17174


📊 Evaluación de Random Forest:

🔍 Accuracy: 1.0

📊 Matriz de confusión:
 [[16781     0]
 [    0   393]]

📋 Reporte de clasificación:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00     16781
           1       1.00      1.00      1.00       393

    accuracy                           1.00     17174
   macro avg       1.00      1.00      1.00     17174
weighted avg       1.00      1.00      1.00     17174