In [17]:
# Cargar librerías
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, cross_val_score, RepeatedStratifiedKFold
from sklearn.preprocessing import OrdinalEncoder, StandardScaler, OneHotEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from imblearn.under_sampling import RandomUnderSampler, NearMiss
from imblearn.pipeline import Pipeline
from imblearn.under_sampling import CondensedNearestNeighbour
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from ctgan import CTGAN
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Cargamos los datos de un fichero excel
file_path = 'D:/MÁSTER DATA SCIENCE/KSCHOOL/1.TFM/PARTTE 2 TFM/df_final.xlsx'
df = pd.read_excel(file_path)

# DEFINIR FUNCIONES

In [18]:
# Identificar características numéricas y categóricas
numeric_features = df.select_dtypes(include=[float, int]).columns.tolist()
categorical_features = df.select_dtypes(include=['object', 'category']).columns.tolist()

df[numeric_features]= df[numeric_features].fillna(df[numeric_features].mean().round(1), inplace=False)
# Preprocesamiento para las características numéricas
numeric_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),  # Imputación con la mediana
    ('scaler', StandardScaler())  # Estandarización
])

# Preprocesamiento para las características categóricas ordinales
ordinal_features = ['AJCC Stage']
ordinal_transformer = Pipeline(steps=[
    ('label', OrdinalEncoder(dtype=int))  # Codificación Ordinal
])

# Preprocesamiento para las características categóricas nominales (binarias)
nominal_features = ['Sex']
nominal_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(drop='if_binary', dtype=int))  # Codificación binaria
])

# Combinación de los transformadores en un preprocesador
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('ord', ordinal_transformer, ordinal_features),
        ('nom', nominal_transformer, nominal_features)
    ])

# Función para entrenar y evaluar modelos
def entrenar_y_evaluar_modelo(X_train, y_train, X_test, y_test, model, model_name):
    try:
        # Cross validation
        cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
        scores = cross_val_score(model, X_train, y_train, scoring='accuracy', cv=cv, n_jobs=1)
        print(f"Cross-validated accuracy: {scores.mean():.4f}")

        # Entrenar el modelo
        model.fit(X_train, y_train)

        # Evaluar en el conjunto de entrenamiento
        y_train_pred = model.predict(X_train)
        mostrar_estadisticas_guardar_tabla(y_train, y_train_pred, "Training", model_name)

        # Evaluar en el conjunto de prueba
        y_test_pred = model.predict(X_test)
        mostrar_estadisticas_guardar_tabla(y_test, y_test_pred, "Test", model_name)

    except Exception as e:
        print(f"Error al entrenar o evaluar el modelo {model_name}: {e}")
        
# Definir funciones para evaluación y resultados
def mostrar_estadisticas_guardar_tabla(y_val, y_pred, set_name, model_name):
    global tabla_results_df
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')

    print(f'Model performance for {set_name} - {model_name}')
    print("- Accuracy: {:.4f}".format(accuracy))
    print('- F1 score: {:.4f}'.format(f1))
    print('- Precision: {:.4f}'.format(precision))
    print('- Recall: {:.4f}'.format(recall))
    print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))

    global_score = calcular_puntuacion_global(accuracy, precision, recall, f1)

    new_row = pd.DataFrame({
        'Model': [model_name],
        'Set': [set_name],
        'Accuracy': [accuracy],
        'Precision': [precision],
        'Recall': [recall],
        'F1-Score': [f1],
        'Global Score' : [global_score]
    })

    tabla_results_df = pd.concat([tabla_results_df, new_row], ignore_index=True)

    return tabla_results_df


def calcular_puntuacion_global(accuracy, precision, recall, f1):
    # Definir ponderaciones para cada métrica
    weights = {
        'accuracy': 0.25,
        'precision': 0.25,
        'recall': 0.25,
        'f1': 0.25,
    }

    # Calcular la puntuación global
    global_score = (accuracy * weights['accuracy'] +
                    precision * weights['precision'] +
                    recall * weights['recall'] +
                    f1 * weights['f1'])

    return round(global_score * 100, 2)

In [19]:
# Filtrar el DataFrame
df_breast_colorectum = df[df['Tumor type'].isin(['Breast', 'Colorectum'])]
df_otros = df[df['Tumor type'].isin(['Esophagus', 'Liver', 'Lung', 'Ovary', 'Pancreas', 'Stomach'])]

# Imprimir resultados
print("Parte 1:")
print(df_breast_colorectum)
print("\nParte 2:")
print(df_otros)

Parte 1:
     Unnamed: 0  Tumor type AJCC Stage  AFP (pg/ml)  Angiopoietin-2 (pg/ml)  \
0             0  Colorectum          I     1583.450                 5598.50   
1             1  Colorectum          I      715.308                20936.35   
2             2  Colorectum         II     4365.530                 2350.93   
3             3  Colorectum         II      715.308                 1604.34   
4             4  Colorectum         II      801.300                 2087.57   
..          ...         ...        ...          ...                     ...   
874         874      Breast         II     1200.090                 2997.73   
876         876      Breast        III      792.540                 1727.85   
877         877      Breast         II      792.540                  988.43   
878         878      Breast         II     3311.130                 6524.83   
888         896  Colorectum          I      913.980                  159.97   

     AXL (pg/ml)  CA-125 (U/ml)  CA 15-3 (

In [20]:
#Crear los 2 dataframes

df_breast_colorectum.to_excel('df_breast_colorectum.xlsx', index=False) 
df_otros.to_excel('df_otros.xlsx', index=False)            


In [21]:
df_breast_colorectum.isnull().sum()

Unnamed: 0                     0
Tumor type                     0
AJCC Stage                     0
AFP (pg/ml)                    0
Angiopoietin-2 (pg/ml)         0
AXL (pg/ml)                    0
CA-125 (U/ml)                  0
CA 15-3 (U/ml)                 0
CA19-9 (U/ml)                  0
CD44 (ng/ml)                   0
CEA (pg/ml)                    0
CYFRA 21-1 (pg/ml)             0
DKK1 (ng/ml)                   0
Endoglin (pg/ml)               0
FGF2 (pg/ml)                   0
Follistatin (pg/ml)            0
Galectin-3 (ng/ml)             0
G-CSF (pg/ml)                  0
GDF15 (ng/ml)                  0
HE4 (pg/ml)                    0
HGF (pg/ml)                    0
IL-6 (pg/ml)                   0
IL-8 (pg/ml)                   0
Kallikrein-6 (pg/ml)           0
Leptin (pg/ml)                 0
Mesothelin (ng/ml)             0
Midkine (pg/ml)                0
Myeloperoxidase (ng/ml)        0
NSE (ng/ml)                    0
OPG (ng/ml)                    0
OPN (pg/ml

In [22]:
# Separar las características (X) y la variable objetivo (y)
X = df_breast_colorectum.drop(columns=['Tumor type'])
y = df_breast_colorectum['Tumor type']

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Procesar características categóricas
ordinal_encoder = OrdinalEncoder()
categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()
X_train[categorical_features] = ordinal_encoder.fit_transform(X_train[categorical_features])
X_test[categorical_features] = ordinal_encoder.transform(X_test[categorical_features])

# Escalar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convertir X_train y y_train en un DataFrame para CTGAN
X_train_df = pd.DataFrame(X_train, columns=[f"feature_{i}" for i in range(X_train.shape[1])])
X_train_df['Tumor type'] = y_train.values

# Definir los modelos y estrategias de balanceo
modelos = {
    'Logistic Regression': LogisticRegression(max_iter=5000),  # Aumentar max_iter
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
    'AdaBoost': AdaBoostClassifier(algorithm='SAMME'),  # Usar algoritmo SAMME
    'Gradient Boosting': GradientBoostingClassifier()
}

estrategias_balanceo = {
    'RandomUnderSampler': RandomUnderSampler(random_state=42),
    'NearMiss': NearMiss(),
    'CondensedNearestNeighbour': CondensedNearestNeighbour(random_state=42),
    'CTGAN': CTGAN(),
    'CTGAN + RandomUnderSampler': Pipeline([
        ('ctgan', CTGAN()),
        ('under', RandomUnderSampler(random_state=42))
    ])
}

# Inicializar el DataFrame de resultados
tabla_results_df = pd.DataFrame(columns=['Model', 'Set', 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'Global Score'])

def entrenar_y_evaluar_modelo(X_train_balanced, y_train_balanced, X_test, y_test, modelo, model_name):
    try:
        # Entrenar el modelo
        modelo.fit(X_train_balanced, y_train_balanced)

        # Evaluar en el conjunto de prueba
        y_test_pred = modelo.predict(X_test)
        mostrar_estadisticas_guardar_tabla(y_test, y_test_pred, "Test", model_name)

    except Exception as e:
        print(f"Error al entrenar o evaluar el modelo {model_name}: {e}")

def mostrar_estadisticas_guardar_tabla(y_val, y_pred, set_name, model_name):
    global tabla_results_df
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')

    print(f'Model performance for {set_name} - {model_name}')
    print("- Accuracy: {:.4f}".format(accuracy))
    print('- F1 score: {:.4f}'.format(f1))
    print('- Precision: {:.4f}'.format(precision))
    print('- Recall: {:.4f}'.format(recall))
    print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))

    global_score = calcular_puntuacion_global(accuracy, precision, recall, f1)

    new_row = pd.DataFrame({
        'Model df_breast_colorectum': [model_name],
        'Set df_breast_colorectum': [set_name],
        'Accuracy df_breast_colorectum': [accuracy],
        'Precision df_breast_colorectum': [precision],
        'Recall df_breast_colorectum': [recall],
        'F1-Score df_breast_colorectum': [f1],
        'Global Score df_breast_colorectum': [global_score]
    })

    tabla_results_df = pd.concat([tabla_results_df, new_row], ignore_index=True)

def calcular_puntuacion_global(accuracy, precision, recall, f1):
    # Definir ponderaciones para cada métrica
    weights = {
        'accuracy': 0.25,
        'precision': 0.25,
        'recall': 0.25,
        'f1': 0.25,
    }

    # Calcular la puntuación global
    global_score = (accuracy * weights['accuracy'] +
                    precision * weights['precision'] +
                    recall * weights['recall'] +
                    f1 * weights['f1'])

    return round(global_score * 100, 2)

# Entrenar y evaluar modelos para cada estrategia de balanceo
for estrategia_nombre, estrategia in estrategias_balanceo.items():
    print(f"\nEstrategia de balanceo: {estrategia_nombre}")

    if estrategia_nombre == 'CTGAN':
        # Entrenar CTGAN y generar datos sintéticos
        ctgan = CTGAN()
        ctgan.fit(X_train_df, discrete_columns=['Tumor type'])
        samples = ctgan.sample(len(X_train_df))
        X_train_balanced = samples.drop(columns=['Tumor type']).values
        y_train_balanced = samples['Tumor type'].values
    elif estrategia_nombre == 'CTGAN + RandomUnderSampler':
        # Entrenar CTGAN y generar datos sintéticos
        ctgan = CTGAN()
        ctgan.fit(X_train_df, discrete_columns=['Tumor type'])
        samples = ctgan.sample(len(X_train_df))
        X_synthetic = samples.drop(columns=['Tumor type']).values
        y_synthetic = samples['Tumor type'].values
        # Combinar datos sintéticos con datos reales
        X_combined = np.vstack([X_train, X_synthetic])
        y_combined = np.hstack([y_train, y_synthetic])
        X_train_balanced, y_train_balanced = RandomUnderSampler(random_state=42).fit_resample(X_combined, y_combined)
    else:
        X_train_balanced, y_train_balanced = estrategia.fit_resample(X_train, y_train)

    for modelo_nombre, modelo in modelos.items():
        print(f"\nModelo: {modelo_nombre}")
        model_name = f"{modelo_nombre} ({estrategia_nombre})"
        entrenar_y_evaluar_modelo(X_train_balanced, y_train_balanced, X_test, y_test, modelo, model_name)

# Guardar los resultados en un archivo Excel
tabla_results_df.to_excel('tabla_results_df_breast_colorectum.xlsx', index=False)



Estrategia de balanceo: RandomUnderSampler

Modelo: Logistic Regression
Model performance for Test - Logistic Regression (RandomUnderSampler)
- Accuracy: 0.8667
- F1 score: 0.8679
- Precision: 0.8701
- Recall: 0.8667
Confusion Matrix:
 [[ 45  10]
 [ 14 111]]

Modelo: Decision Tree
Model performance for Test - Decision Tree (RandomUnderSampler)
- Accuracy: 0.8556
- F1 score: 0.8586
- Precision: 0.8671
- Recall: 0.8556
Confusion Matrix:
 [[ 47   8]
 [ 18 107]]

Modelo: Random Forest
Model performance for Test - Random Forest (RandomUnderSampler)
- Accuracy: 0.9000
- F1 score: 0.9021
- Precision: 0.9100
- Recall: 0.9000
Confusion Matrix:
 [[ 51   4]
 [ 14 111]]

Modelo: KNN
Model performance for Test - KNN (RandomUnderSampler)
- Accuracy: 0.8056
- F1 score: 0.8119
- Precision: 0.8361
- Recall: 0.8056
Confusion Matrix:
 [[47  8]
 [27 98]]

Modelo: AdaBoost
Model performance for Test - AdaBoost (RandomUnderSampler)
- Accuracy: 0.8944
- F1 score: 0.8961
- Precision: 0.9006
- Recall: 0.8944


In [23]:
df_otros.isnull().sum()

Unnamed: 0                     0
Tumor type                     0
AJCC Stage                     0
AFP (pg/ml)                    0
Angiopoietin-2 (pg/ml)         0
AXL (pg/ml)                    0
CA-125 (U/ml)                  0
CA 15-3 (U/ml)                 0
CA19-9 (U/ml)                  0
CD44 (ng/ml)                   0
CEA (pg/ml)                    0
CYFRA 21-1 (pg/ml)             0
DKK1 (ng/ml)                   0
Endoglin (pg/ml)               0
FGF2 (pg/ml)                   0
Follistatin (pg/ml)            0
Galectin-3 (ng/ml)             0
G-CSF (pg/ml)                  0
GDF15 (ng/ml)                  0
HE4 (pg/ml)                    0
HGF (pg/ml)                    0
IL-6 (pg/ml)                   0
IL-8 (pg/ml)                   0
Kallikrein-6 (pg/ml)           0
Leptin (pg/ml)                 0
Mesothelin (ng/ml)             0
Midkine (pg/ml)                0
Myeloperoxidase (ng/ml)        0
NSE (ng/ml)                    0
OPG (ng/ml)                    0
OPN (pg/ml

In [24]:
# Separar las características (X) y la variable objetivo (y)
X = df_otros.drop(columns=['Tumor type'])
y = df_otros['Tumor type']

# Dividir los datos en conjuntos de entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Procesar características categóricas
ordinal_encoder = OrdinalEncoder()
categorical_features = X.select_dtypes(include=['object', 'category']).columns.tolist()
X_train[categorical_features] = ordinal_encoder.fit_transform(X_train[categorical_features])
X_test[categorical_features] = ordinal_encoder.transform(X_test[categorical_features])

# Escalar los datos
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convertir X_train y y_train en un DataFrame para CTGAN
X_train_df = pd.DataFrame(X_train, columns=[f"feature_{i}" for i in range(X_train.shape[1])])
X_train_df['Tumor type'] = y_train.values

# Definir los modelos y estrategias de balanceo
modelos = {
    'Logistic Regression': LogisticRegression(max_iter=5000),  # Aumentar max_iter
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'KNN': KNeighborsClassifier(),
    'AdaBoost': AdaBoostClassifier(algorithm='SAMME'),  # Usar algoritmo SAMME
    'Gradient Boosting': GradientBoostingClassifier()
}

estrategias_balanceo = {
    'RandomUnderSampler': RandomUnderSampler(random_state=42),
    'NearMiss': NearMiss(),
    'CondensedNearestNeighbour': CondensedNearestNeighbour(random_state=42),
    'CTGAN': CTGAN(),
    'CTGAN + RandomUnderSampler': Pipeline([
        ('ctgan', CTGAN()),
        ('under', RandomUnderSampler(random_state=42))
    ])
}

# Inicializar el DataFrame de resultados
tabla_results_df = pd.DataFrame(columns=['Model', 'Set', 'Accuracy', 'Precision', 'Recall', 'F1-Score', 'Global Score'])

def entrenar_y_evaluar_modelo(X_train_balanced, y_train_balanced, X_test, y_test, modelo, model_name):
    try:
        # Entrenar el modelo
        modelo.fit(X_train_balanced, y_train_balanced)

        # Evaluar en el conjunto de prueba
        y_test_pred = modelo.predict(X_test)
        mostrar_estadisticas_guardar_tabla(y_test, y_test_pred, "Test", model_name)

    except Exception as e:
        print(f"Error al entrenar o evaluar el modelo {model_name}: {e}")

def mostrar_estadisticas_guardar_tabla(y_val, y_pred, set_name, model_name):
    global tabla_results_df
    accuracy = accuracy_score(y_val, y_pred)
    precision = precision_score(y_val, y_pred, average='weighted')
    recall = recall_score(y_val, y_pred, average='weighted')
    f1 = f1_score(y_val, y_pred, average='weighted')

    print(f'Model performance for {set_name} - {model_name}')
    print("- Accuracy: {:.4f}".format(accuracy))
    print('- F1 score: {:.4f}'.format(f1))
    print('- Precision: {:.4f}'.format(precision))
    print('- Recall: {:.4f}'.format(recall))
    print("Confusion Matrix:\n", confusion_matrix(y_val, y_pred))

    global_score = calcular_puntuacion_global(accuracy, precision, recall, f1)

    new_row = pd.DataFrame({
        'Model df_otros ': [model_name],
        'Set df_otros ': [set_name],
        'Accuracy df_otros ': [accuracy],
        'Precision df_otros ': [precision],
        'Recall df_otros ': [recall],
        'F1-Score df_otros ': [f1],
        'Global Score df_otros ': [global_score]
    })

    tabla_results_df = pd.concat([tabla_results_df, new_row], ignore_index=True)

def calcular_puntuacion_global(accuracy, precision, recall, f1):
    # Definir ponderaciones para cada métrica
    weights = {
        'accuracy': 0.25,
        'precision': 0.25,
        'recall': 0.25,
        'f1': 0.25,
    }

    # Calcular la puntuación global
    global_score = (accuracy * weights['accuracy'] +
                    precision * weights['precision'] +
                    recall * weights['recall'] +
                    f1 * weights['f1'])

    return round(global_score * 100, 2)

# Entrenar y evaluar modelos para cada estrategia de balanceo
for estrategia_nombre, estrategia in estrategias_balanceo.items():
    print(f"\nEstrategia de balanceo: {estrategia_nombre}")

    if estrategia_nombre == 'CTGAN':
        # Entrenar CTGAN y generar datos sintéticos
        ctgan = CTGAN()
        ctgan.fit(X_train_df, discrete_columns=['Tumor type'])
        samples = ctgan.sample(len(X_train_df))
        X_train_balanced = samples.drop(columns=['Tumor type']).values
        y_train_balanced = samples['Tumor type'].values
    elif estrategia_nombre == 'CTGAN + RandomUnderSampler':
        # Entrenar CTGAN y generar datos sintéticos
        ctgan = CTGAN()
        ctgan.fit(X_train_df, discrete_columns=['Tumor type'])
        samples = ctgan.sample(len(X_train_df))
        X_synthetic = samples.drop(columns=['Tumor type']).values
        y_synthetic = samples['Tumor type'].values
        # Combinar datos sintéticos con datos reales
        X_combined = np.vstack([X_train, X_synthetic])
        y_combined = np.hstack([y_train, y_synthetic])
        X_train_balanced, y_train_balanced = RandomUnderSampler(random_state=42).fit_resample(X_combined, y_combined)
    else:
        X_train_balanced, y_train_balanced = estrategia.fit_resample(X_train, y_train)

    for modelo_nombre, modelo in modelos.items():
        print(f"\nModelo: {modelo_nombre}")
        model_name = f"{modelo_nombre} ({estrategia_nombre})"
        entrenar_y_evaluar_modelo(X_train_balanced, y_train_balanced, X_test, y_test, modelo, model_name)

# Guardar los resultados en un archivo Excel
tabla_results_df.to_excel('tabla_results_df_otros.xlsx', index=False)



Estrategia de balanceo: RandomUnderSampler

Modelo: Logistic Regression
Model performance for Test - Logistic Regression (RandomUnderSampler)
- Accuracy: 0.6423
- F1 score: 0.6491
- Precision: 0.6617
- Recall: 0.6423
Confusion Matrix:
 [[ 3  2  1  0  0  4]
 [ 5  9  2  0  0  1]
 [ 2  2 22  0  1  4]
 [ 1  0  0 14  2  1]
 [ 0  0  1  0 21  2]
 [ 2  3  8  0  0 10]]

Modelo: Decision Tree
Model performance for Test - Decision Tree (RandomUnderSampler)
- Accuracy: 0.6260
- F1 score: 0.6362
- Precision: 0.6542
- Recall: 0.6260
Confusion Matrix:
 [[ 5  0  3  0  0  2]
 [ 5  8  1  0  0  3]
 [ 2  4 20  1  1  3]
 [ 2  0  0 14  2  0]
 [ 2  0  2  2 18  0]
 [ 2  4  5  0  0 12]]

Modelo: Random Forest
Model performance for Test - Random Forest (RandomUnderSampler)
- Accuracy: 0.7236
- F1 score: 0.7223
- Precision: 0.7264
- Recall: 0.7236
Confusion Matrix:
 [[ 3  0  2  0  0  5]
 [ 3 10  1  0  2  1]
 [ 2  2 25  0  2  0]
 [ 0  0  1 16  1  0]
 [ 1  0  2  0 21  0]
 [ 1  3  5  0  0 14]]

Modelo: KNN
Model p

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Model performance for Test - Random Forest (CTGAN)
- Accuracy: 0.2276
- F1 score: 0.1938
- Precision: 0.3209
- Recall: 0.2276
Confusion Matrix:
 [[ 0  2  0  8  0  0]
 [ 0  2  0  7  2  6]
 [ 0  3  4  5  7 12]
 [ 0  0  0 12  1  5]
 [ 0  5  0  9  2  8]
 [ 0  2  1  9  3  8]]

Modelo: KNN
Model performance for Test - KNN (CTGAN)
- Accuracy: 0.2195
- F1 score: 0.1815
- Precision: 0.2124
- Recall: 0.2195
Confusion Matrix:
 [[ 2  0  1  5  1  1]
 [ 4  5  2  3  0  3]
 [ 1  2  3 19  0  6]
 [ 0  0  2 13  0  3]
 [ 1  1  4 18  0  0]
 [ 4  2  1 11  1  4]]

Modelo: AdaBoost
Model performance for Test - AdaBoost (CTGAN)
- Accuracy: 0.1301
- F1 score: 0.0807
- Precision: 0.0648
- Recall: 0.1301
Confusion Matrix:
 [[ 0  3  0  6  1  0]
 [ 0  5  1  9  0  2]
 [ 0  8  0 13  0 10]
 [ 0  6  0  8  0  4]
 [ 0 18  0  5  0  1]
 [ 0  7  1 12  0  3]]

Modelo: Gradient Boosting
Model performance for Test - Gradient Boosting (CTGAN)
- Accuracy: 0.2195
- F1 score: 0.1911
- Precision: 0.2809
- Recall: 0.2195
Confusion M