In [1]:
# Librerias
import cv2
import os
import imutils
import shutil
import random

In [48]:
def create_directories(base_path, person_name):
    preprocessed_path = os.path.join(base_path, 'preprocessed', person_name)
    if not os.path.exists(preprocessed_path):
        os.makedirs(preprocessed_path)
        print(f'Carpeta creada: {preprocessed_path}')
    return preprocessed_path

def load_haar_cascade(cascade_path):
    face_classifier = cv2.CascadeClassifier(cascade_path)
    if face_classifier.empty():
        print("Error al cargar el clasificador Haar Cascade.")
        return None
    return face_classifier

def capture_faces(video_path, preprocessed_path, face_classifier, max_images=300):
    cap = cv2.VideoCapture(video_path)
    count = 0

    while True:
        ret, frame = cap.read()
        if not ret:
            break
        
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        faces = face_classifier.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=7)

        for (x, y, w, h) in faces:
            cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)
            face = frame[y:y + h, x:x + w]
            face = cv2.resize(face, (150, 150), interpolation=cv2.INTER_CUBIC)
            cv2.imwrite(os.path.join(preprocessed_path, f'face_{count}.jpg'), face)
            count += 1
            
            if count >= max_images:
                break
        
        cv2.imshow('Video', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
    return count

def split_dataset(preprocessed_path, person_name):
    train_dir = os.path.join(preprocessed_path, 'train')
    validation_dir = os.path.join(preprocessed_path, 'validation')

    # Crear carpetas de entrenamiento y validación
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(validation_dir, exist_ok=True)

    all_images = [f for f in os.listdir(preprocessed_path) if f.endswith('.jpg')]
    
    # Mezclar aleatoriamente las imágenes
    random.shuffle(all_images)

    # Dividir el conjunto en 80% para entrenamiento y 20% para validación
    split_index = int(len(all_images) * 0.8)
    train_images = all_images[:split_index]
    validation_images = all_images[split_index:]

    # Mover las imágenes a sus respectivas carpetas
    for img in train_images:
        src = os.path.join(preprocessed_path, img)
        dst = os.path.join(train_dir, img)
        shutil.move(src, dst)

    for img in validation_images:
        src = os.path.join(preprocessed_path, img)
        dst = os.path.join(validation_dir, img)
        shutil.move(src, dst)

    print('Distribución de imágenes completada.')

In [49]:
def main():
    # Nombre de la persona
    person_name = 'David'  # Cambiar por el nombre de la persona

    # Rutas de carpetas
    base_path = '../data'  # Ajusta la ruta si es necesario
    video_path = os.path.join(base_path, 'crudo/prueba.mp4')  # Ruta del video
    preprocessed_path = create_directories(base_path, person_name)  # Ruta para almacenar los rostros

    # Cargar el clasificador Haar Cascade
    haar_cascade_path = '../models/haarcascade_frontalface_alt.xml'
    face_classifier = load_haar_cascade(haar_cascade_path)

    if face_classifier is not None:
        # Captura de rostros
        captured_count = capture_faces(video_path, preprocessed_path, face_classifier)

        if captured_count > 0:
            print(f'Se han capturado {captured_count} rostros.')
            # Dividir el conjunto de datos en train y validation
            split_dataset(preprocessed_path, person_name)
        else:
            print('No se han capturado rostros.')

if __name__ == "__main__":
    main()

Se han capturado 148 rostros.
Distribución de imágenes completada.
