In [1]:
import os
import numpy as np
import random
import pandas as pd
import sys 

from PIL import Image
from skimage.transform import resize
from skimage.color import rgb2gray
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, classification_report, confusion_matrix, ConfusionMatrixDisplay, roc_curve, auc
from sklearn.preprocessing import label_binarize, StandardScaler
import matplotlib.pyplot as plt
import kagglehub

from constants import excel_filename, dataset_name, CV
from modelos.RegresionLogistica import regresion_logistica
from modelos.CNN import cnn1, cnn2
from modelos.KNN import knn
from modelos.ArbolDeDecision import arbol_decision

## guardar datos en excel ##
def save_to_excel(datos):
    # Si el archivo ya existe, se leerá y se agregará nueva información
    metodo =  datos.pop("Método")
    if os.path.exists(excel_filename):
        results = pd.read_excel(excel_filename, index_col=0)
        df = pd.DataFrame(datos, index=metodo)
        results = pd.concat([results, df], ignore_index=False)
        results.to_excel(excel_filename)
    else:
        results = pd.DataFrame(datos)
        results.index = metodo
        results.to_excel(excel_filename)

#función para hacer la gráfica después de evaluar el rendimiento
def plot_rendimiento(exactitud, sensibilidad, precision, matriz_confusion, clases, fpr_micro, tpr_micro, roc_auc_micro, fpr, tpr, roc_auc):
    # Imprimir métricas de evaluación
    print("Exactitud    : %.2f %%" % exactitud)
    print("Sensibilidad : %.2f %%" % sensibilidad)
    print("Precisión    : %.2f %%" % precision)

    #matriz de confusion
    disp = ConfusionMatrixDisplay(confusion_matrix=matriz_confusion, display_labels=clases)
    disp.plot()
    disp.figure_.suptitle("Matriz de confusión")
    disp.figure_.set_dpi(100)
    plt.xlabel("Clase predicha")
    plt.ylabel("Clase real")
    plt.show()

    #roc y auc
    plt.figure()
    plt.plot(fpr_micro, tpr_micro, color='red', lw=2, label='Curva ROC micro-average (AUC = %0.3f)' % roc_auc_micro)
    plt.plot([0, 1], [0, 1], color='k', lw=1, linestyle='--')
    plt.xlabel('Tasa de Falsos Positivos')
    plt.ylabel('Tasa de Verdaderos Positivos')
    plt.legend(loc="lower right")
    plt.show()

    # Curvas ROC por clase
    plt.figure(figsize=(10, 8))
    colors = ['aqua', 'blue', 'violet', 'gold', 'orange', 'pink', 'tan', 'purple', 'lime', 'red']
    for i in range(len(clases)):
        plt.plot(fpr[i], tpr[i], color=colors[i], lw=1, label='ROC clase %i (area = %0.3f)' % (i, roc_auc[i]))

    plt.plot(fpr_micro, tpr_micro, color='red', lw=2, linestyle=':', label='Curva ROC micro-average (AUC = %0.3f)' % roc_auc_micro)
    plt.plot([0, 1], [0, 1], color='k', lw=2, linestyle='--')
    plt.xlabel('Tasa de Falsos Positivos')
    plt.ylabel('Tasa de Verdaderos Positivos')
    plt.title('Curva ROC por clase')
    plt.legend(loc="lower right")
    plt.show()

## evaluar rendimiento ##
def evaluar_rendimiento(model, X_test, y_test, nombre_metodo, pca):
    # Aplanar las imágenes de prueba
    X_test_flat = X_test.reshape(X_test.shape[0], -1)  # Aplanar a 2D: [n_samples, n_features]
    
    # Normalizar los datos de prueba (debe coincidir con la normalización de X_train)
    scaler = StandardScaler()
    X_test_scaled = scaler.fit_transform(X_test_flat)
    
    # Aplicar PCA al conjunto de prueba (debe ser la misma transformación que en el entrenamiento)
    if pca is not None:
        X_test_pca = pca.transform(X_test_scaled)  # Aquí usamos el PCA entrenado para transformar X_test
    else:
        X_test_pca = model.pca.transform(X_test_scaled)  # Aquí usamos el PCA entrenado para transformar X_test

    # Hacer predicciones
    y_pred = model.predict(X_test_pca)

    # Métricas de rendimiento
    precision = 100 * accuracy_score(y_test, y_pred)
    sensibility = 100 * recall_score(y_test, y_pred, average='macro')
    precision_score_value = 100 * precision_score(y_test, y_pred, average='macro')

    # Informe de clasificación
    print("Informe de evaluación del clasificador sobre el conjunto de test:\n", classification_report(y_test, y_pred))

    # Matriz de confusión
    cm = confusion_matrix(y_test, y_pred, labels=model.classes_)

    # ROC y AUC
    n_classes = len(np.unique(y_test))
    y_test_bin = label_binarize(y_test, classes=np.arange(0, n_classes, 1))
    y_score = model.predict_proba(X_test_pca)

    fpr_micro, tpr_micro, _ = roc_curve(y_test_bin.ravel(), y_score.ravel())
    roc_auc_micro = auc(fpr_micro, tpr_micro)

    # ROC por clase
    fpr = dict()
    tpr = dict()
    roc_auc = dict()

    for i in range(n_classes):
        fpr[i], tpr[i], _ = roc_curve(y_test_bin[:, i], y_score[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Aquí iría la función para guardar en Excel (asegúrate de que 'Excel_data' y 'save_to_excel' están bien implementadas)
    datos = {
        "Método": [nombre_metodo],
        "Exactitud": [100 * accuracy_score(y_test, y_pred)],
        "Precisión": [100 * precision_score(y_test, y_pred, average='macro')],
        "Matriz de confusión": [cm],
        "fpr_micro": [fpr_micro],
        "tpr_micro": [tpr_micro],
        "roc_auc_micro": [roc_auc_micro],
        "fpr": [fpr],
        "tpr": [tpr],
        "roc_auc": [roc_auc]
    }
    save_to_excel(datos)
    # plot rendimiento
    plot_rendimiento(
        precision,
        sensibility,
        precision_score_value,
        cm,
        model.classes_,
        fpr_micro,
        tpr_micro,
        roc_auc_micro,
        fpr,
        tpr,
        roc_auc
    )


## cargar imágenes ##
def cargar_imagenes(image_path, target_size=(256, 256), channel_mode="rgb"):

    img_list = []
    labels = []
    classes = os.listdir(image_path)

    for folder in classes:
        folder_path = os.path.join(image_path, folder)

        if os.path.isdir(folder_path):
            for filename in os.listdir(folder_path):
                if filename.endswith(('.jpg', '.png', '.jpeg')):
                    img = Image.open(os.path.join(folder_path, filename)).convert("RGB")
                    img_array = np.array(img)

                    # Redimensionar
                    img_resized = resize(img_array, target_size, anti_aliasing=True)

                    # Modos de canal
                    if channel_mode == "grayscale":
                        img_resized = rgb2gray(img_resized)  # Convertir a escala de grises
                    elif channel_mode == "r":  # Canal rojo
                        img_resized = img_resized[:, :, 0]
                    elif channel_mode == "g":  # Canal verde
                        img_resized = img_resized[:, :, 1]
                    elif channel_mode == "b":  # Canal azul
                        img_resized = img_resized[:, :, 2]
                    elif channel_mode == "rgb":
                        img_resized = (img_resized * 255).astype(np.uint8)  # Restaurar valores de píxeles

                    img_resized = img_resized / 255.0  # Normalizar
                    img_list.append(img_resized)
                    labels.append(folder)  # Guardar etiqueta

    return np.array(img_list), np.array(labels)        


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
path = kagglehub.dataset_download(dataset_name)

# cargar train y test
X_train_rgb, y_train = cargar_imagenes(os.path.join(path, 'flowers/flowers/flower_photos/train'), channel_mode="rgb")
X_test_rgb, y_test = cargar_imagenes(os.path.join(path, 'flowers/flowers/flower_photos/test'), channel_mode="rgb")
X_train_gray, _ = cargar_imagenes(os.path.join(path, 'flowers/flowers/flower_photos/train'), channel_mode="grayscale")
X_test_gray, _ = cargar_imagenes(os.path.join(path, 'flowers/flowers/flower_photos/test'), channel_mode="grayscale")
X_train_rgb_64, y_train = cargar_imagenes(os.path.join(path, 'flowers/flowers/flower_photos/train'), target_size=(64, 64))
X_test_rgb_64, y_test = cargar_imagenes(os.path.join(path, 'flowers/flowers/flower_photos/test'), target_size=(64, 64))

# Codificar etiquetas
label_encoder = LabelEncoder()
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)

epochs = 100
batch_size = 100
history = []

In [3]:
X_train_flattened = X_train_rgb_64.reshape(X_train_rgb_64.shape[0], -1)  # Convierte a 2D (n_samples, n_features)
X_test_flattened = X_test_rgb_64.reshape(X_test_rgb_64.shape[0], -1)      # Hace lo mismo para el test set

In [4]:
from sklearn.model_selection import cross_validate

CV = 5
scoring = ['precision_macro', 'recall_macro', 'precision_micro', 'recall_micro', 'f1_macro', 'accuracy', 'roc_auc_ovo']


In [14]:
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras import layers, datasets, models
from tensorflow.keras.utils import to_categorical
import tensorflow as tf

def cnn1():
    model = models.Sequential()
    model.add(keras.Input(shape=(64, 64, 3)))  # Nueva dimensión de entrada

    # Primera capa convolucional
    model.add(layers.Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))

    # Segunda capa convolucional
    model.add(layers.Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(layers.MaxPooling2D((2, 2)))

    # Global Average Pooling para reducir dimensionalidad
    model.add(layers.GlobalAveragePooling2D())

    # Capa densa
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dropout(0.5))  # Dropout para evitar el sobreajuste

    # Capa de salida con 5 clases
    model.add(layers.Dense(5, activation='softmax'))

    # Compilación del modelo
    model.compile(optimizer='adam',
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])

    return model

def cnn2():
    model = models.Sequential()
    model.add(keras.Input(shape=(64, 64, 3)))  # Nueva dimensión de entrada

    # Bloque 1
    model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(layers.Conv2D(32, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.2))

    # Bloque 2
    model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(layers.Conv2D(64, (3, 3), activation='relu', kernel_initializer='he_uniform', padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Dropout(0.2))

    # Global Average Pooling en lugar de Flatten
    model.add(layers.GlobalAveragePooling2D())

    # Capa densa
    model.add(layers.Dense(64, activation='relu', kernel_initializer='he_uniform'))
    model.add(layers.Dropout(0.2))

    # Capa de salida con Softmax
    model.add(layers.Dense(5, activation='softmax'))

    # Compilación del modelo
    opt = keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer=opt,
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
                  metrics=['accuracy'])

    return model

In [7]:
print(y_train_encoded)

[0 0 0 ... 4 4 4]


In [16]:
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, roc_auc_score
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models

# Definir el número de folds
CV = 5
kf = StratifiedKFold(n_splits=CV, shuffle=True, random_state=42)


# Para almacenar las métricas de cada fold
scores = {
    "precision_macro": [],
    "recall_macro": [],
    "precision_micro": [],
    "recall_micro": [],
    "f1_macro": [],
    "accuracy": [],
    "roc_auc": []
}

for fold, (train_index, val_index) in enumerate(kf.split(X_train_rgb_64, y_train_encoded)):
    print(f"\nFold {fold + 1}/{CV}")

    # Dividir los datos en train y validación
    X_train_fold, X_val_fold = X_train_rgb_64[train_index], X_train_rgb_64[val_index]
    y_train_fold, y_val_fold = y_train_encoded[train_index], y_train_encoded[val_index]

    # Crear un nuevo modelo en cada fold
    model = cnn1()

    # Entrenar el modelo
    model.fit(X_train_fold, y_train_fold,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(X_val_fold, y_val_fold),
              verbose=1)

    # Predecir en el conjunto de validación
    y_pred_probs = model.predict(X_val_fold)  # Probabilidades
    y_pred = np.argmax(y_pred_probs, axis=1)  # Clases predichas

    # Calcular métricas
    scores["accuracy"].append(accuracy_score(y_val_fold, y_pred))
    scores["precision_macro"].append(precision_score(y_val_fold, y_pred, average="macro"))
    scores["recall_macro"].append(recall_score(y_val_fold, y_pred, average="macro"))
    scores["precision_micro"].append(precision_score(y_val_fold, y_pred, average="micro"))
    scores["recall_micro"].append(recall_score(y_val_fold, y_pred, average="micro"))
    scores["f1_macro"].append(f1_score(y_val_fold, y_pred, average="macro"))

    # Calcular ROC AUC (solo si es clasificación multiclase con one-hot encoding)
    try:
        roc_auc = roc_auc_score(y_val_fold, y_pred_probs, multi_class="ovr")
        scores["roc_auc"].append(roc_auc)
    except ValueError:
        scores["roc_auc"].append(None)

# Mostrar los resultados de cada fold
for metric, values in scores.items():
    print(f"\n{metric}: {values}")



Fold 1/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 124ms/step - accuracy: 0.2426 - loss: 1.6007 - val_accuracy: 0.3347 - val_loss: 1.5547
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step

Fold 2/5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 132ms/step - accuracy: 0.2273 - loss: 1.6003 - val_accuracy: 0.3390 - val_loss: 1.5447
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step

Fold 3/5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 146ms/step - accuracy: 0.2594 - loss: 1.5953 - val_accuracy: 0.3206 - val_loss: 1.5498
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step

Fold 4/5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 155ms/step - accuracy: 0.2311 - loss: 1.5966 - val_accuracy: 0.3771 - val_loss: 1.5194
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 24ms/step

Fold 5/5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 169ms/step - accuracy: 0.2163 - loss: 1.6054 - val_accuracy: 0.3588 - val_loss: 1.5475
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step

precision_macro: [0.2847789311681259, 0.2508539155125732, 0.2700928072505196, 0.32312819883765365, 0.36230220428506843]

recall_macro: [0.31262344296827055, 0.299148795010864, 0.2712002200976683, 0.3460466712184833, 0.33169610316792825]

precision_micro: [0.3347457627118644, 0.3389830508474576, 0.3206214689265537, 0.3771186440677966, 0.3587570621468927]

recall_micro: [0.3347457627118644, 0.3389830508474576, 0.3206214689265537, 0.3771186440677966, 0.3587570621468927]

f1_macro: [0.2511499450079433, 0.24265712821694282, 0.19527660869452607, 0.2924371127350146, 0.29129874614623874]

accuracy: [0.3347457627118644, 0.3389830508474576, 0.3206214689265537, 0.3771186440677966, 0.3587570621468927]

roc_auc: [np.float64(0.7409002681110595), np.float64(0.7188553766204164), 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [17]:
for fold, (train_index, val_index) in enumerate(kf.split(X_train_rgb_64, y_train_encoded)):
    print(f"\nFold {fold + 1}/{CV}")

    # Dividir los datos en train y validación
    X_train_fold, X_val_fold = X_train_rgb_64[train_index], X_train_rgb_64[val_index]
    y_train_fold, y_val_fold = y_train_encoded[train_index], y_train_encoded[val_index]

    # Crear un nuevo modelo en cada fold
    model2 = cnn2()

    # Entrenar el modelo
    model2.fit(X_train_fold, y_train_fold,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(X_val_fold, y_val_fold),
              verbose=1)

    # Predecir en el conjunto de validación
    y_pred_probs = model2.predict(X_val_fold)  # Probabilidades
    y_pred = np.argmax(y_pred_probs, axis=1)  # Clases predichas

    # Calcular métricas
    scores["accuracy"].append(accuracy_score(y_val_fold, y_pred))
    scores["precision_macro"].append(precision_score(y_val_fold, y_pred, average="macro"))
    scores["recall_macro"].append(recall_score(y_val_fold, y_pred, average="macro"))
    scores["precision_micro"].append(precision_score(y_val_fold, y_pred, average="micro"))
    scores["recall_micro"].append(recall_score(y_val_fold, y_pred, average="micro"))
    scores["f1_macro"].append(f1_score(y_val_fold, y_pred, average="macro"))

    # Calcular ROC AUC (solo si es clasificación multiclase con one-hot encoding)
    try:
        roc_auc = roc_auc_score(y_val_fold, y_pred_probs, multi_class="ovr")
        scores["roc_auc"].append(roc_auc)
    except ValueError:
        scores["roc_auc"].append(None)

# Mostrar los resultados de cada fold
for metric, values in scores.items():
    print(f"\n{metric}: {values}")


Fold 1/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 659ms/step - accuracy: 0.3524 - loss: 1.5764 - val_accuracy: 0.1992 - val_loss: 1.6044
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step

Fold 2/5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 736ms/step - accuracy: 0.3943 - loss: 1.5934 - val_accuracy: 0.2528 - val_loss: 1.6404
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step

Fold 3/5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 802ms/step - accuracy: 0.3700 - loss: 1.5561 - val_accuracy: 0.3390 - val_loss: 1.4927
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 59ms/step

Fold 4/5
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 715ms/step - accuracy: 0.3688 - loss: 1.5369 - val_accuracy: 0.3588 - val_loss: 1.5447
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 46ms/step

Fold 5/5


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 679ms/step - accuracy: 0.3668 - loss: 1.6592 - val_accuracy: 0.2754 - val_loss: 1.5369
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 42ms/step

precision_macro: [0.2847789311681259, 0.2508539155125732, 0.2700928072505196, 0.32312819883765365, 0.36230220428506843, 0.3395939234589541, 0.20221967963386728, 0.4634548216270704, 0.368373134569026, 0.5318170924785609]

recall_macro: [0.31262344296827055, 0.299148795010864, 0.2712002200976683, 0.3460466712184833, 0.33169610316792825, 0.22108467468695786, 0.28808384692100175, 0.3679032430397086, 0.3883558084258352, 0.307764202521521]

precision_micro: [0.3347457627118644, 0.3389830508474576, 0.3206214689265537, 0.3771186440677966, 0.3587570621468927, 0.19915254237288135, 0.2528248587570621, 0.3389830508474576, 0.3587570621468927, 0.2754237288135593]

recall_micro: [0.3347457627118644, 0.3389830508474576, 0.3206214689265537, 0.3771186440677966, 0.3587570621468927,

In [5]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
import numpy as np

def arbol_decision_vgg16(X_train, y_train, X_test, input_shape=(256,256,3)):
    # Cargar VGG16 preentrenado SIN la capa superior
    base_model = VGG16(weights="imagenet", include_top=False, input_shape=input_shape)
    feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)

    # Extraer características con VGG16
    X_train_features = feature_extractor.predict(X_train)
    X_test_features = feature_extractor.predict(X_test)

    # Aplanar las características extraídas
    X_train_features_flat = X_train_features.reshape(X_train_features.shape[0], -1)
    X_test_features_flat = X_test_features.reshape(X_test_features.shape[0], -1)

    # Validación cruzada con las características extraídas
    model_tree_vgg = DecisionTreeClassifier(criterion="gini", max_depth=10, random_state=42)

    scores_tree_vgg = cross_validate(model_tree_vgg, X_train_features_flat, y_train, cv=5, scoring=scoring)
    #print(f"Precisión media de validación cruzada con VGG16: {np.mean(scores_tree_vgg):.4f}")
    # Mostrar los resultados promedio de cada métrica
    print("Resultados de Validación Cruzada:")
    for metric in scoring:
        mean_score = np.mean(scores_tree_vgg[f'test_{metric}'])
        print(f"{metric}: {mean_score:.4f}")
    # Entrenar el modelo con todo el conjunto de entrenamiento
    model_tree_vgg.fit(X_train_features_flat, y_train)

    return model_tree_vgg, scores_tree_vgg

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
import numpy as np

def arbol_vgg16_pca(X_train, y_train, X_test, input_shape=(256,256,3), n_components=500):
    # Cargar VGG16 preentrenado SIN la capa superior
    base_model = VGG16(weights="imagenet", include_top=False, input_shape=input_shape)
    feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)

    # Extraer características con VGG16
    X_train_features = feature_extractor.predict(X_train)
    X_test_features = feature_extractor.predict(X_test)

    # Aplanar las características extraídas
    X_train_features_flat = X_train_features.reshape(X_train_features.shape[0], -1)
    X_test_features_flat = X_test_features.reshape(X_test_features.shape[0], -1)

    # Reducción de dimensionalidad con PCA
    pca = PCA(n_components=n_components, svd_solver='randomized')
    X_train_pca = pca.fit_transform(X_train_features_flat)
    X_test_pca = pca.transform(X_test_features_flat)

    # Validación cruzada con PCA y VGG16
    model_tree_pca = DecisionTreeClassifier(criterion="gini", max_depth=20, random_state=42)
    
    scores_tree_pca = cross_validate(model_tree_pca, X_train_pca, y_train, cv=5, scoring=scoring)
    #print(f"Precisión media de validación cruzada con VGG16 y PCA: {np.mean(scores_tree_pca):.4f}")
    print("Resultados de Validación Cruzada:")
    for metric in scoring:
        mean_score = np.mean(scores_tree_pca[f'test_{metric}'])
        print(f"{metric}: {mean_score:.4f}")
    # Entrenar el modelo con todo el conjunto de entrenamiento
    model_tree_pca.fit(X_train_pca, y_train)

    return model_tree_pca, scores_tree_pca




In [18]:
model_tree_vgg = arbol_decision_vgg16(X_train_rgb, y_train_encoded, X_test_rgb)

[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m777s[0m 7s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 5s/step
Resultados de Validación Cruzada:
precision_macro: 0.4695
recall_macro: 0.4642
precision_micro: 0.4737
recall_micro: 0.4737
f1_macro: 0.4643
accuracy: 0.4737
roc_auc_ovo: 0.6958


In [20]:
model_tree_pca = arbol_vgg16_pca(X_train_rgb, y_train, X_test_rgb)

[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m716s[0m 6s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 4s/step
Resultados de Validación Cruzada:
precision_macro: 0.5037
recall_macro: 0.5047
precision_micro: 0.5147
recall_micro: 0.5147
f1_macro: 0.5035
accuracy: 0.5147
roc_auc_ovo: 0.6883


In [2]:
from sklearn.ensemble import RandomForestClassifier
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from skimage.feature import hog
from sklearn.preprocessing import StandardScaler
import numpy as np

def random_forest(X_train, y_train, X_test, input_shape=(256, 256, 3)):
    # Crear y entrenar el modelo Random Forest
    
    X_train_flat = X_train.reshape(X_train.shape[0], -1)
    X_test_flat = X_test.reshape(X_test.shape[0], -1)
    
    rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
    
    scores_rf = cross_validate(rf_model, X_train_flat, y_train, cv=5)
    print(f'Accuracy scores for each fold: {scores_rf}')
    print(f'Mean cross-validation accuracy: {scores_rf["test_score"].mean()}')

    rf_model.fit(X_train_flat, y_train)

    # Hacer predicciones
    y_pred_rf = rf_model.predict(X_test_flat)

    return rf_model

def rforest_vgg16_pca(X_train, y_train, X_test, input_shape=(256, 256, 3), n_components=500):
    
    base_model = VGG16(weights="imagenet", include_top=False, input_shape=input_shape)
    feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)

    # Extraer características con VGG16
    X_train_features = feature_extractor.predict(X_train)
    X_test_features = feature_extractor.predict(X_test)

    # Aplanar las características extraídas
    X_train_features_flat = X_train_features.reshape(X_train_features.shape[0], -1)
    X_test_features_flat = X_test_features.reshape(X_test_features.shape[0], -1)
    
    pca = PCA(n_components=500, svd_solver='randomized')  # Elegimos 200 características más relevantes
    X_train_pca = pca.fit_transform(X_train_features_flat)
    X_test_pca = pca.transform(X_test_features_flat)

    model_rf = RandomForestClassifier(
    n_estimators=200,  # Más árboles = mejor generalización
    max_depth=30,  # Mayor profundidad
    min_samples_split=3,  # Menos datos necesarios para dividir
    min_samples_leaf=2,  # Evita ramas muy pequeñas
    random_state=42
    )

    scores_rf_pca = cross_validate(model_rf, X_train_pca, y_train, cv=5)
    print(f'Accuracy scores for each fold: {scores_rf_pca}')
    #print(f'Mean cross-validation accuracy: {scores_rf_pca.mean()}')
    print(f'Mean cross-validation accuracy: {scores_rf_pca["test_score"].mean()}')
    model_rf.fit(X_train_pca, y_train)

    # Precisión con VGG16 + PCA
    return model_rf

def extract_hog_features(images):

    hog_features = []
    for img in images:
        feature = hog(img, pixels_per_cell=(16, 16), cells_per_block=(2, 2), feature_vector=True)
        hog_features.append(feature)

    return np.array(hog_features)

def rforest_vgg16_pca_hog(X_train, y_train, X_test, X_train_gray, X_test_gray, input_shape=(256,256,3), n_components=500):
    base_model = VGG16(weights="imagenet", include_top=False, input_shape=input_shape)
    feature_extractor = Model(inputs=base_model.input, outputs=base_model.output)

    # Extraer características con VGG16
    X_train_features = feature_extractor.predict(X_train)
    X_test_features = feature_extractor.predict(X_test)

    # Aplanar las características extraídas
    X_train_features_flat = X_train_features.reshape(X_train_features.shape[0], -1)
    X_test_features_flat = X_test_features.reshape(X_test_features.shape[0], -1)
    
    pca = PCA(n_components=500, svd_solver='randomized')  # Elegimos 200 características más relevantes
    X_train_pca = pca.fit_transform(X_train_features_flat)
    X_test_pca = pca.transform(X_test_features_flat)

    # Extraer características HOG
    X_train_hog = extract_hog_features(X_train_gray)
    X_test_hog = extract_hog_features(X_test_gray)
    # Normalizar características
    scaler = StandardScaler()
    X_train_hog_scaled = scaler.fit_transform(X_train_hog)
    X_test_hog_scaled = scaler.transform(X_test_hog)

    # Concatenar VGG16 + HOG
    X_train_combined = np.hstack((X_train_pca, X_train_hog_scaled))
    X_test_combined = np.hstack((X_test_pca, X_test_hog_scaled))

    # Entrenar Random Forest con características combinadas
    model_rf_combined = RandomForestClassifier(n_estimators=200, max_depth=30, random_state=42)
    
    cv_scores = cross_validate(model_rf_combined, X_train_combined, y_train, cv=5)
    print(f'Accuracy scores for each fold: {cv_scores}')
    #print(f'Mean cross-validation accuracy: {cv_scores.mean()}')
    print(f'Mean cross-validation accuracy: {cv_scores["test_score"].mean()}')

    model_rf_combined.fit(X_train_combined, y_train)

    return model_rf_combined, cv_scores

In [24]:
rf_model = random_forest(X_train_rgb_64, y_train, X_test_rgb_64)


Accuracy scores for each fold: {'fit_time': array([25.2925384 , 24.76381326, 24.87243223, 25.19280028, 25.19163847]), 'score_time': array([0.05431128, 0.0724647 , 0.0597024 , 0.05120516, 0.05671883]), 'test_score': array([0.52824859, 0.52118644, 0.53107345, 0.49576271, 0.52966102])}
Mean cross-validation accuracy: 0.5211864406779662


In [25]:
rf_model_vgg16_pca = rforest_vgg16_pca(X_train_rgb, y_train, X_test_rgb, input_shape=(256, 256, 3), n_components=500)


[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m662s[0m 6s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3s/step
Accuracy scores for each fold: {'fit_time': array([14.03428292, 13.8959868 , 13.93460393, 14.09746408, 13.97438383]), 'score_time': array([0.05023885, 0.04000211, 0.03836036, 0.03998375, 0.03776574]), 'test_score': array([0.67937853, 0.72175141, 0.72316384, 0.69915254, 0.69774011])}
Mean cross-validation accuracy: 0.7042372881355933


In [1]:
rf_model_vgg16_pca

NameError: name 'rf_model_vgg16_pca' is not defined

In [7]:
rf_model_vgg16_pca_hog = rforest_vgg16_pca_hog(X_train_rgb, y_train, X_test_rgb, X_train_gray, X_test_gray, input_shape=(256, 256, 3), n_components=500)

[1m111/111[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m654s[0m 6s/step
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3s/step
Accuracy scores for each fold: {'fit_time': array([55.87965798, 54.57125759, 55.30457354, 55.33034348, 55.56378818]), 'score_time': array([0.09120703, 0.08135343, 0.08197927, 0.08349848, 0.08234692]), 'test_score': array([0.52118644, 0.52683616, 0.53672316, 0.52542373, 0.53954802])}
Mean cross-validation accuracy: 0.5299435028248588
