In [2]:
# Librerias
import cv2
import os
import random

In [3]:
def create_directories(base_path, person_name):
    # Crear las carpetas de train y validation
    preprocessed_path = os.path.join(base_path, 'preprocessed')
    train_dir = os.path.join(preprocessed_path, 'train', person_name)
    validation_dir = os.path.join(preprocessed_path, 'validation', person_name)

    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)

    print(f'Carpetas creadas: {train_dir} y {validation_dir}')
    return train_dir, validation_dir

def load_haar_cascade(cascade_path):
    face_classifier = cv2.CascadeClassifier(cascade_path)
    if face_classifier.empty():
        print("Error al cargar el clasificador Haar Cascade.")
        return None
    return face_classifier

def capture_faces(video_path, preprocessed_path, face_classifier, max_images=400):
    cap = cv2.VideoCapture(video_path)
    count = 0
    captured_faces = []

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_classifier.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)

        for (x, y, w, h) in faces:
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            face = frame[y:y + h, x:x + w]
            face = cv2.resize(face, (224, 224), interpolation=cv2.INTER_CUBIC)
            captured_faces.append(face)
            count += 1
            
        if count == max_images:
            break
        
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    return captured_faces

def split_dataset(captured_faces, train_dir, validation_dir):
    total_images = len(captured_faces)
    
    if total_images == 0:
        print("No hay imágenes para dividir.")
        return

    # Mezclar aleatoriamente las imágenes
    random.shuffle(captured_faces)

    # Dividir el conjunto en 80% para entrenamiento y 20% para validación
    split_index = int(total_images * 0.8)
    train_faces = captured_faces[:split_index]
    validation_faces = captured_faces[split_index:]

    # Guardar las imágenes en las carpetas correspondientes
    for i, face in enumerate(train_faces):
        cv2.imwrite(os.path.join(train_dir, f'face_{i}.jpg'), face)

    for i, face in enumerate(validation_faces):
        cv2.imwrite(os.path.join(validation_dir, f'face_{i}.jpg'), face)

    print('Distribución de imágenes completada.')

In [None]:
def main():
    # Nombre de la persona
    person_name = 'Prueba'  # Cambiar por el nombre de la persona

    # Rutas de carpetas
    base_path = '../data'  # Ajusta la ruta si es necesario
    video_path = os.path.join(base_path, 'crudo/Prueba.mp4')  # Ruta del video
    train_dir, validation_dir = create_directories(base_path, person_name)  # Rutas para almacenar los rostros

    # Cargar el clasificador Haar Cascade
    haar_cascade_path = '../models/haarcascade_frontalface_alt.xml'
    face_classifier = load_haar_cascade(haar_cascade_path)

    if face_classifier is not None:
        # Captura de rostros
        captured_faces = capture_faces(video_path, train_dir, face_classifier)

        if len(captured_faces) > 0:
            print(f'Se han capturado {len(captured_faces)} rostros.')
            # Dividir el conjunto de datos en train y validation
            split_dataset(captured_faces, train_dir, validation_dir)
        else:
            print('No se han capturado rostros.')

if __name__ == "__main__":
    main()

Carpetas creadas: ../data\preprocessed\train\David-prueba y ../data\preprocessed\validation\David-prueba
Se han capturado 400 rostros.
Distribución de imágenes completada.
