In [1]:
import os
import cv2
import shutil
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Charger le dataset

In [10]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [7]:
# Chemin vers le dossier contenant les dossiers de célébrités

root_dir = '/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/face_dataset/dataset'
classes_dir = os.listdir(root_dir)

In [11]:
# Créer les dossiers de destination s'ils n'existent pas déjà
os.makedirs('/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/face_dataset/data/train', exist_ok=True)
os.makedirs('/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/face_dataset/data/val', exist_ok=True)
os.makedirs('/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/face_dataset/data/test', exist_ok=True)

In [12]:
# Pourcentages de séparation
train_size = 0.6
test_size = 0.2
validation_size = 0.2

In [15]:

for cls in classes_dir:
    # Chemin vers le dossier spécifique de la classe
    src_dir = os.path.join(root_dir, cls)

    # Obtenir la liste des fichiers
    files = [file for file in os.listdir(src_dir) if file.endswith('.jpg') or file.endswith('.jpeg')or file.endswith('.png')]

    # Séparer le jeu de données
    train_files, test_files = train_test_split(files, train_size=train_size, random_state=42)
    val_files, test_files = train_test_split(test_files, test_size=0.5, random_state=42)  # Divise le reste en deux pour val et test

    # Créer des sous-dossiers dans train, val, test
    for folder in ['train', 'val', 'test']:
        os.makedirs(f'/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/face_dataset/data/{folder}/{cls}', exist_ok=True)

    # Fonction pour copier les fichiers
    def copy_files(files, src_dir, dest_dir):
        for file in files:
            src_file_path = os.path.join(src_dir, file)
            dest_file_path = os.path.join(dest_dir, file)
            shutil.copy(src_file_path, dest_file_path)

    # Copier les fichiers
    copy_files(train_files, src_dir, f'/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/face_dataset/data/train/{cls}')
    copy_files(val_files, src_dir, f'/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/face_dataset/data/val/{cls}')
    copy_files(test_files, src_dir, f'/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/face_dataset/data/test/{cls}')

# Pré-Visualisation

# Detection de visage

In [None]:

# Charger le modèle pré-entraîné pour la détection de visage d'OpenCV
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')

In [None]:
# Fonction pour détecter et sauvegarder les visages d'une image
def save_detected_faces(src_path, dst_path, file_name):
    image = cv2.imread(src_path)
    if image is not None:
      gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
      faces = face_cascade.detectMultiScale(gray, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))

      for i, (x, y, w, h) in enumerate(faces):
        roi_color = image[y:y+h, x:x+w]
        if not os.path.exists(dst_path):
            os.makedirs(dst_path)
        cv2.imwrite(os.path.join(dst_path, f"{i}_{file_name}"), roi_color)
      else:  print(f" image non valide from {src_path}")

In [None]:
# Fonction pour parcourir les ensembles de données et appliquer la détection
def process_dataset(data_type):
    data_dir = f'/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/data/{data_type}'
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.lower().endswith(('.png', '.jpg', '.jpeg')):
                src_path = os.path.join(root, file)
                dst_path = root.replace(f'/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/data/{data_type}', f'/content/drive/MyDrive/ProjetIA/Real_time_face_recognition/detected_faces/{data_type}')
                # print('src_path',src_path)
                # print('dst_pat' ,dst_path)
                save_detected_faces(src_path, dst_path, file)

In [None]:
# Appliquer la détection pour chaque ensemble
for data_type in ['train', 'val', 'test']:
    process_dataset(data_type)