In [None]:
import numpy as np
import os
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import f1_score, classification_report
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from skimage.io import imread
from skimage.transform import resize

# Configuración
IMG_SIZE = (64, 64)  # Tamaño al que se redimensionarán las imágenes

def load_images_from_folder(folder, label, img_size=IMG_SIZE):
    images = []
    labels = []
    for filename in os.listdir(folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            img_path = os.path.join(folder, filename)
            img = imread(img_path, as_gray=True)
            img_resized = resize(img, img_size, anti_aliasing=True)
            images.append(img_resized.flatten())
            labels.append(label)
    return images, labels

# Cargar datos
# Asume que tienes dos carpetas: 'faces/' y 'non_faces/' con imágenes correspondientes
faces_folder = '/Users/rodrigobenitez/Documents/GitHub/ORT-AI/ML para AI/Obligatorio/Faces'
non_faces_folder = '/Users/rodrigobenitez/Documents/GitHub/ORT-AI/ML para AI/Obligatorio/Generar_Fondos/generated_patches'

faces_imgs, faces_labels = load_images_from_folder(faces_folder, 1)
non_faces_imgs, non_faces_labels = load_images_from_folder(non_faces_folder, 0)

X = np.array(faces_imgs + non_faces_imgs)
y = np.array(faces_labels + non_faces_labels)

# Dividir en entrenamiento y prueba
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)




In [3]:
X_train

array([[0.4       , 0.56862745, 0.59607843, ..., 0.09803922, 0.09803922,
        0.10196078],
       [0.54509804, 0.5372549 , 0.57647059, ..., 0.38039216, 0.39215686,
        0.38823529],
       [0.28627451, 0.25490196, 0.25098039, ..., 0.38823529, 0.40784314,
        0.41960784],
       ...,
       [0.16078431, 0.15686275, 0.15686275, ..., 0.52156863, 0.52156863,
        0.51372549],
       [0.42352941, 0.48627451, 0.63529412, ..., 0.21176471, 0.19215686,
        0.19215686],
       [0.49411765, 0.34117647, 0.17647059, ..., 0.48627451, 0.60784314,
        0.28235294]])

In [4]:
y_train

array([0, 0, 0, ..., 0, 0, 0])

In [None]:
# Aplicar PCA para reducción de dimensionalidad
n_components = 100  # Puedes ajustar este valor
pca = PCA(n_components=n_components, whiten=True, random_state=42)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

# Definir modelos a probar
models = {
    'SVM': SVC(class_weight='balanced', probability=True, random_state=42),
    'RandomForest': RandomForestClassifier(class_weight='balanced', random_state=42),
    'LogisticRegression': LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)
}

# Hiperparámetros para GridSearch
param_grids = {
    'SVM': {
        'C': [0.1, 1, 10],
        'kernel': ['linear', 'rbf'],
        'gamma': ['scale', 'auto']
    },
    'RandomForest': {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20]
    },
    'LogisticRegression': {
        'C': [0.1, 1, 10]
    }
}

best_f1 = 0
best_model = None
best_name = ""
best_report = ""

for name, model in models.items():
    print(f"Entrenando y buscando hiperparámetros para: {name}")
    grid = GridSearchCV(model, param_grids[name], scoring='f1', cv=5, n_jobs=-1)
    grid.fit(X_train_pca, y_train)
    y_pred = grid.predict(X_test_pca)
    f1 = f1_score(y_test, y_pred)
    print(f"Mejor F1 para {name}: {f1:.4f}")
    print(classification_report(y_test, y_pred))
    if f1 > best_f1:
        best_f1 = f1
        best_model = grid.best_estimator_
        best_name = name
        best_report = classification_report(y_test, y_pred)

print(f"\nMejor modelo: {best_name} con F1 score de {best_f1:.4f}")
print("Reporte de clasificación del mejor modelo:")
print(best_report)