In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import random
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import cv2
from matplotlib import pyplot as plt
from mtcnn import MTCNN

In [None]:
dataset_url = "./dataset/"

folder_count = 0
total_file_count = 0

for root, dirs, files in os.walk(dataset_url):
  folder_count += len(dirs)
  folder_file_count = len(files)
  total_file_count += folder_file_count

  print(f"Folder: {os.path.basename(root)}, Files: {folder_file_count}")

print(f"\nNumber of folders: {folder_count}")
print(f"Total number of files: {total_file_count}")

In [None]:
dataset_url = './dataset'

all_folders = [folder for folder in os.listdir(dataset_url) if os.path.isdir(os.path.join(dataset_url, folder))]
selected_folders = random.sample(all_folders, 10)
fig, axes = plt.subplots(2, 5, figsize=(10, 5))

for i, folder in enumerate(selected_folders):
    folder_path = os.path.join(dataset_url, folder)
    all_images = [img for img in os.listdir(folder_path) if img.endswith('.jpg')]
    selected_image = random.choice(all_images)
    image_path = os.path.join(folder_path, selected_image)
    
    img = mpimg.imread(image_path)
    axes[i // 5, i % 5].imshow(img)
    axes[i // 5, i % 5].axis('off')
    
    label = folder[5:]
    axes[i // 5, i % 5].set_title(label)
plt.tight_layout()
plt.show()

In [None]:
import matplotlib.patches as patches
from mtcnn import MTCNN

def detect_display_faces(image_path):
  img = cv2.imread(image_path)
  img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
  detector = MTCNN()
    
  # Perform face detection
  faces = detector.detect_faces(img_rgb)
  
  # Display the original image
  fig, ax = plt.subplots()
  ax.imshow(img_rgb)
  
  # Add bounding boxes for each detected face
  for face in faces:
      x, y, w, h = face['box']
      rect = patches.Rectangle((x, y), w, h, linewidth=2, edgecolor='r', facecolor='none')
      ax.add_patch(rect)
  plt.axis('off')
  plt.show()

dataset_url = './dataset'
all_folders = [folder for folder in os.listdir(dataset_url) if os.path.isdir(os.path.join(dataset_url, folder))]

# Randomly pick a folder
selected_folder = random.choice(all_folders)
folder_path = os.path.join(dataset_url, selected_folder)

# Get a list of all images in the selected folder
all_images = [img for img in os.listdir(folder_path) if img.endswith('.jpg')]
selected_image = random.choice(all_images)
image_path = os.path.join(folder_path, selected_image)

# Perform face detection using MTCNN and display the result
detect_display_faces(image_path)

In [None]:
import os
import shutil

train_dataset_url = './train-dataset'

for item in os.listdir(train_dataset_url):
  item_path = os.path.join(train_dataset_url, item)
  if os.path.isfile(item_path) or os.path.islink(item_path):
    os.unlink(item_path)
  elif os.path.isdir(item_path):
    shutil.rmtree(item_path)

print("Contents of the working directory cleared.")


In [None]:
import shutil

# Source directory
source_directory = './dataset'

# Destination directory
destination_directory = './train-dataset'

# Create train and test directories if not exist
train_directory = os.path.join(destination_directory, 'train')
os.makedirs(train_directory, exist_ok=True)


# Get a list of all folders in the source directory
all_folders = [folder for folder in os.listdir(source_directory) if os.path.isdir(os.path.join(source_directory, folder))]

for folder in all_folders:
    folder_path = os.path.join(source_directory, folder)
    
    # Get a list of all images in the folder
    all_images = [img for img in os.listdir(folder_path) if img.endswith('.jpg')]
    
    # Create a label for the folder (drop the first 5 characters)
    label = folder[5:]
    
    # Shuffle the images
    random.shuffle(all_images)
    
    # Move the first 85 images to the train directory
    for i in range(85):
        image_path = os.path.join(folder_path, all_images[i])
        destination_path = os.path.join(train_directory, label, all_images[i])
        os.makedirs(os.path.dirname(destination_path), exist_ok=True)
        shutil.copy(image_path, destination_path)
    

print("Data splitting completed.")

In [None]:
from matplotlib.patches import Rectangle
from mtcnn.mtcnn import MTCNN

# Directory paths
train_directory = './train-dataset//train'

# Get a list of all folders in the train directory
all_folders = [folder for folder in os.listdir(train_directory) if os.path.isdir(os.path.join(train_directory, folder))]

# Randomly select two folders
selected_folders = random.sample(all_folders, 2)

# Initialize MTCNN detector
detector = MTCNN()

for folder in selected_folders:
  folder_path = os.path.join(train_directory, folder)
  
  # Get a list of all images in the folder
  all_images = [img for img in os.listdir(folder_path) if img.endswith('.jpg')]
  
  # Randomly select one image
  selected_image = random.choice(all_images)
  image_path = os.path.join(folder_path, selected_image)
  
  # Read the image
  image = cv2.imread(image_path)
  image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  
  # Detect faces using MTCNN
  faces = detector.detect_faces(image)
    
  # Display image with bounding boxes around detected faces
  plt.figure(figsize=(8, 3))
  
  # Display original image
  plt.subplot(1, 2, 1)
  plt.imshow(image_rgb)
  plt.title('Original Image')
  
  # Display image with bounding boxes
  plt.subplot(1, 2, 2)
  plt.imshow(image_rgb)
  for face in faces:
    x, y, width, height = face['box']
    rect = Rectangle((x, y), width, height, fill=False, color='red')
    plt.gca().add_patch(rect)
    
    # Display additional keypoints
    for key, value in face['keypoints'].items():
      plt.scatter(value[0], value[1], s=30, color='blue', marker='o')
      plt.text(value[0] + 5, value[1], key, color='blue')
    
plt.title('Detected Faces with Keypoints')
plt.show()

# Display metadata of detected faces
print(f"Metadata of detected faces in {folder}/{selected_image}:")
for i, face in enumerate(faces):
    print(f"Face {i + 1}:")
    print(f"   Confidence: {face['confidence']:.2f}")
    print(f"   Bounding Box: {face['box']}")
    print(f"   Keypoints: {face['keypoints']}")
    print()


In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
from PIL import Image

def process_image(image_path, face_detector, face_embedder, label):
  image = cv2.imread(image_path)
  image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  boxes, probs = face_detector.detect(image_rgb)

  if boxes is not None:
    # Display label of the image using the folder it is picked up from
    plt.title(f"Label: {label}")

    # Display original image
    plt.imshow(Image.fromarray(image_rgb))

    # Draw a rectangle around each detected face on the original image
    for i, box in enumerate(boxes):
        x, y, w, h = box
        # Draw a rectangle around the detected face with thinner lines
        cv2.rectangle(image_rgb, (int(x), int(y)), (int(x + w), int(y + h)), (255, 0, 0), 1)

    # Display the image with the rectangles around detected faces
    plt.imshow(Image.fromarray(image_rgb))

    plt.axis('off')
    plt.show()

    # Apply transformations and normalization
    transformed_faces = [face_embedder(extract_face(image_rgb, box).unsqueeze(0)) for box in boxes]

    # Convert PyTorch tensor to NumPy array
    transformed_faces_np = [transformed_face.squeeze().detach().numpy() for transformed_face in transformed_faces]
    for i, embeddings in enumerate(transformed_faces_np):
      print(f"Embeddings for Detected Face {i + 1}:", embeddings)

# Directory paths
train_directory = './train-dataset/train/'

# Randomly pick a folder
selected_folder = random.choice(os.listdir(train_directory))
selected_folder_path = os.path.join(train_directory, selected_folder)

# Randomly pick one file from the selected folder
file = random.choice(os.listdir(selected_folder_path))
file_path = os.path.join(selected_folder_path, file)

# Initialize MTCNN for face detection with adjusted parameters
face_detector = MTCNN(margin=20, post_process=False, select_largest=False)
   
# Initialize InceptionResnetV1 for face embedding
face_embedder = InceptionResnetV1(pretrained='vggface2').eval()

# Process image, detect faces, calculate embeddings, and display results
process_image(file_path, face_detector, face_embedder, label=selected_folder)

In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
from PIL import Image
from sklearn.metrics.pairwise import euclidean_distances

def process_images(folder_path1, file1, folder_path2, file2, face_detector, face_embedder):
  # Read the images
  image1_path = os.path.join(folder_path1, file1)
  image2_path = os.path.join(folder_path2, file2)

  image1 = cv2.imread(image1_path)
  image1_rgb = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)

  image2 = cv2.imread(image2_path)
  image2_rgb = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB)

  # Detect faces using MTCNN
  faces1, _ = face_detector.detect(image1_rgb)
  faces2, _ = face_detector.detect(image2_rgb)

  # Get labels from folder names
  label1 = os.path.basename(folder_path1)
  label2 = os.path.basename(folder_path2)

  # Display original images with rectangles around detected faces
  plt.subplot(1, 2, 1)
  plt.imshow(Image.fromarray(image1_rgb))
  plt.title(f"Original Image 1\nLabel: {label1}")

  for i, face in enumerate(faces1):
    x, y, w, h = face
    # Draw a rectangle around the detected face
    cv2.rectangle(image1_rgb, (int(x), int(y)), (int(x+w), int(y+h)), (255, 0, 0), 2)

  plt.subplot(1, 2, 2)
  plt.imshow(Image.fromarray(image1_rgb))
  plt.title(f"Detected Faces 1")
  plt.axis('off')

  plt.show()

  plt.subplot(1, 2, 1)
  plt.imshow(Image.fromarray(image2_rgb))
  plt.title(f"Original Image 2\nLabel: {label2}")

  for i, face in enumerate(faces2):
    x, y, w, h = face
    # Draw a rectangle around the detected face
    cv2.rectangle(image2_rgb, (int(x), int(y)), (int(x+w), int(y+h)), (255, 0, 0), 2)

  plt.subplot(1, 2, 2)
  plt.imshow(Image.fromarray(image2_rgb))
  plt.title(f"Detected Faces 2")
  plt.axis('off')

  plt.show()

  # Calculate embeddings for the first image
  embeddings1 = [face_embedder(extract_face(image1_rgb, face).unsqueeze(0)).squeeze().detach().numpy() for face in faces1]
  embeddings2 = [face_embedder(extract_face(image2_rgb, face).unsqueeze(0)).squeeze().detach().numpy() for face in faces2]
  distance = euclidean_distances(embeddings1[0].reshape(1, -1), embeddings2[0].reshape(1, -1))[0][0]

  # Display the distance between the two embeddings
  print(f"Distance between embeddings: {distance:.4f}")

train_directory = './train-dataset/train/'

# Randomly pick two folders
selected_folders = random.sample(os.listdir(train_directory), 2)

# Randomly pick one file from each selected folder
file1 = random.choice(os.listdir(os.path.join(train_directory, selected_folders[0])))
file2 = random.choice(os.listdir(os.path.join(train_directory, selected_folders[1])))

# Initialize MTCNN for face detection
face_detector = MTCNN(keep_all=True)

# Initialize InceptionResnetV1 for face embedding
face_embedder = InceptionResnetV1(pretrained='vggface2').eval()

# Process images, detect faces, calculate embeddings, and display results
process_images(
    os.path.join(train_directory, selected_folders[0]),
    file1,
    os.path.join(train_directory, selected_folders[1]),
    file2,
    face_detector,
    face_embedder
)

In [None]:
from facenet_pytorch import MTCNN, InceptionResnetV1, extract_face
from PIL import Image
from sklearn.metrics.pairwise import euclidean_distances

# Function to process images, detect faces, and calculate embeddings
def process_images(folder_path, face_detector, face_embedder):
  # Randomly pick two files from the selected folder
  files = random.sample(os.listdir(folder_path), 2)
  
  # Read the images
  image1_path = os.path.join(folder_path, files[0])
  image2_path = os.path.join(folder_path, files[1])

  image1 = cv2.imread(image1_path)
  image1_rgb = cv2.cvtColor(image1, cv2.COLOR_BGR2RGB)

  image2 = cv2.imread(image2_path)
  image2_rgb = cv2.cvtColor(image2, cv2.COLOR_BGR2RGB)

  # Detect faces using MTCNN
  faces1, _ = face_detector.detect(image1_rgb)
  faces2, _ = face_detector.detect(image2_rgb)

  # Get the label from the folder name
  label = os.path.basename(folder_path)

  # Display original images with rectangles around detected faces
  plt.subplot(1, 2, 1)
  plt.imshow(Image.fromarray(image1_rgb))
  plt.title(f"Original Image 1\nLabel: {label}")

  for i, face in enumerate(faces1):
    x, y, w, h = face
    # Draw a rectangle around the detected face
    cv2.rectangle(image1_rgb, (int(x), int(y)), (int(x+w), int(y+h)), (255, 0, 0), 2)

  plt.subplot(1, 2, 2)
  plt.imshow(Image.fromarray(image1_rgb))
  plt.title(f"Detected Faces 1")
  plt.axis('off')

  plt.show()

  plt.subplot(1, 2, 1)
  plt.imshow(Image.fromarray(image2_rgb))
  plt.title(f"Original Image 2\nLabel: {label}")

  for i, face in enumerate(faces2):
    x, y, w, h = face
    # Draw a rectangle around the detected face
    cv2.rectangle(image2_rgb, (int(x), int(y)), (int(x+w), int(y+h)), (255, 0, 0), 2)

  plt.subplot(1, 2, 2)
  plt.imshow(Image.fromarray(image2_rgb))
  plt.title(f"Detected Faces 2")
  plt.axis('off')

  plt.show()

  # Calculate embeddings for the first image
  embeddings1 = [face_embedder(extract_face(image1_rgb, face).unsqueeze(0)).squeeze().detach().numpy() for face in faces1]
  embeddings2 = [face_embedder(extract_face(image2_rgb, face).unsqueeze(0)).squeeze().detach().numpy() for face in faces2]
  distance = euclidean_distances(embeddings1[0].reshape(1, -1), embeddings2[0].reshape(1, -1))[0][0]

  # Display the distance between the two embeddings
  print(f"Distance between embeddings: {distance:.4f}")

# Directory paths
train_directory = './train-dataset/train/'

# Randomly pick one folder
selected_folder = random.choice(os.listdir(train_directory))
selected_folder_path = os.path.join(train_directory, selected_folder)

# Initialize MTCNN for face detection
face_detector = MTCNN(keep_all=True)

# Initialize InceptionResnetV1 for face embedding
face_embedder = InceptionResnetV1(pretrained='vggface2').eval()

# Process images, detect faces, calculate embeddings, and display results
process_images(selected_folder_path, face_detector, face_embedder)

## Preprocessing

In [None]:
import os
import shutil

# Set the directory path
directory_path = './train_detected_faces'

# Remove all files and subdirectories in the directory
for item in os.listdir(directory_path):
  item_path = os.path.join(directory_path, item)
  if os.path.isfile(item_path) or os.path.islink(item_path):
    os.unlink(item_path)
  elif os.path.isdir(item_path):
    shutil.rmtree(item_path)

print("Contents of train_detected_face cleared.")

In [None]:
import os
import cv2
import numpy as np

data_directory = './train-dataset/train'
output_directory = './train_detected_faces/'

def detect_faces_and_save(image_paths, output_directory):
  processed_images = []
  images_without_faces = 0

  for i, image_path in enumerate(image_paths):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

    # Use OpenCV's deep learning-based face detector
    face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades + 'haarcascade_frontalface_default.xml')
    faces = face_cascade.detectMultiScale(gray, scaleFactor=1.3, minNeighbors=5)
    if len(faces) > 0:
      # Assuming only one face is detected, adjust the dimensions
      x, y, w, h = faces[0]
      face_roi = img[y:y+h, x:x+w]
      resized_face = cv2.resize(face_roi, (224, 224))

      # Get the original folder name
      folder_name = image_path.split('/')[-2]
      # Create the output folder if it doesn't exist
      output_folder = os.path.join(output_directory, folder_name)
      os.makedirs(output_folder, exist_ok=True)

      # Save the detected face with the same folder structure
      output_path = os.path.join(output_folder, f"detected_face_{i}.jpg")
      cv2.imwrite(output_path, resized_face)

      processed_images.append(resized_face)
    else:
      images_without_faces += 1

    if i % 50 == 0:
      print(f"{i}/{len(image_paths)} images processed", end='\r', flush=True)
  
  print(f"\nImages without faces detected/Total images: {images_without_faces}/{len(image_paths)}")
  return np.array(processed_images)

# Create output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)

# Get list of all image paths
all_image_paths = []
for folder_name in os.listdir(data_directory):
  folder_path = os.path.join(data_directory, folder_name)
  if os.path.isdir(folder_path):
    image_paths = [os.path.join(folder_path, image_name) for image_name in os.listdir(folder_path)]
    all_image_paths.extend(image_paths)

X_all_processed = detect_faces_and_save(all_image_paths, output_directory)

print("face detection complete")

In [None]:
import os
import cv2
import matplotlib.pyplot as plt
import numpy as np

# Replace this with the path to your detected faces directory
output_directory = './train_detected_faces'

# Get a list of subdirectories (folders) in the output directory
folders = [f for f in os.listdir(output_directory) if os.path.isdir(os.path.join(output_directory, f))]

# Randomly pick five folders
selected_folders = np.random.choice(folders, size=5, replace=False)

# Set up the subplot
fig, axes = plt.subplots(nrows=5, ncols=5, figsize=(15, 15))

for i, folder_name in enumerate(selected_folders):
  folder_path = os.path.join(output_directory, folder_name)
  image_paths = [os.path.join(folder_path, image_name) for image_name in os.listdir(folder_path)[:5]]

  for j, image_path in enumerate(image_paths):
    # Read the image using OpenCV
    img = cv2.imread(image_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    # Display the image
    axes[i, j].imshow(img)
    axes[i, j].set_title(f'{folder_name} - {j+1}')
    axes[i, j].axis('off')

plt.tight_layout()
plt.show()

In [None]:
import os

output_directory = './train_detected_faces'

# Get a list of all folders under the output directory
all_folders = [folder for folder in os.listdir(output_directory) if os.path.isdir(os.path.join(output_directory, folder))]

# Iterate through each folder and print the folder name and file count
for folder in all_folders:
  folder_path = os.path.join(output_directory, folder)
  file_count = len(os.listdir(folder_path))
  print(f"Folder: {folder}, File Count: {file_count}")

In [None]:
from facenet_pytorch import InceptionResnetV1
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import euclidean_distances
import torch

def process_images(face1_path, face2_path, face_embedder):
  # Read the pre-detected face images
  face1 = cv2.imread(face1_path)
  face1_rgb = cv2.cvtColor(face1, cv2.COLOR_BGR2RGB)

  face2 = cv2.imread(face2_path)
  face2_rgb = cv2.cvtColor(face2, cv2.COLOR_BGR2RGB)

  # Get labels from folder names
  label1 = os.path.basename(os.path.dirname(face1_path))
  label2 = os.path.basename(os.path.dirname(face2_path))

  # Display original face images
  plt.subplot(1, 2, 1)
  plt.imshow(Image.fromarray(face1_rgb))
  plt.title(f"Original Face 1\nLabel: {label1}")
  plt.axis('off')

  plt.subplot(1, 2, 2)
  plt.imshow(Image.fromarray(face2_rgb))
  plt.title(f"Original Face 2\nLabel: {label2}")
  plt.axis('off')

  plt.show()
  # Convert NumPy arrays to PyTorch tensors
  face1_tensor = torch.from_numpy(face1_rgb.transpose(2, 0, 1)).unsqueeze(0).float() / 255.0
  face2_tensor = torch.from_numpy(face2_rgb.transpose(2, 0, 1)).unsqueeze(0).float() / 255.0

  # Calculate embeddings for the first face
  embeddings1 = face_embedder(face1_tensor).detach().numpy()

  # Print the embeddings for the first face
  # print("Embeddings for Face 1:", embeddings1)

  # Calculate embeddings for the second face
  embeddings2 = face_embedder(face2_tensor).detach().numpy()

  # Print the embeddings for the second face
  # print("Embeddings for Face 2:", embeddings2)

  # Calculate distance between the two embeddings
  distance = euclidean_distances(embeddings1.reshape(1, -1), embeddings2.reshape(1, -1))[0][0]

  # Display the distance between the two embeddings
  print(f"Distance between embeddings: {distance:.4f}")

# Directory paths
train_detected_faces_directory = './train_detected_faces/'

# Randomly pick two folders
selected_folders = random.sample(os.listdir(train_detected_faces_directory), 2)

# Randomly pick one file from each selected folder
file1 = random.choice(os.listdir(os.path.join(train_detected_faces_directory, selected_folders[0])))
file2 = random.choice(os.listdir(os.path.join(train_detected_faces_directory, selected_folders[1])))

# Full paths for the selected faces
face1_path = os.path.join(train_detected_faces_directory, selected_folders[0], file1)
face2_path = os.path.join(train_detected_faces_directory, selected_folders[1], file2)

# Initialize InceptionResnetV1 for face embedding
face_embedder = InceptionResnetV1(pretrained='vggface2').eval()

# Process face images, calculate embeddings, and display results
process_images(face1_path, face2_path, face_embedder)

## Extracting embeddings

In [None]:
import shutil
import os

folder_path = './extracted_embeddings'

# Check if the folder exists before deleting
if os.path.exists(folder_path):
  shutil.rmtree(folder_path)
  print(f"The folder {folder_path} has been deleted.")
else:
  print(f"The folder {folder_path} does not exist.")

In [49]:
import torch
from facenet_pytorch import InceptionResnetV1
from PIL import Image
from torchvision import transforms
from tqdm import tqdm

# Function to extract embeddings from a single folder with data augmentation
def extract_embeddings_from_folder(folder_path, face_embedder, device, output_directory):
  embeddings = {}
  data_transform = transforms.Compose([
      transforms.RandomHorizontalFlip(),
      transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
      transforms.RandomRotation(10),
      transforms.ToTensor()
  ])
  label = os.path.basename(folder_path)  # Get the label from the folder name

  for image_name in tqdm(os.listdir(folder_path), desc=f"Processing {folder_path}"):
    image_path = os.path.join(folder_path, image_name)
    try:
      img = Image.open(image_path)

      # Convert image to tensor
      img_tensor = transforms.ToTensor()(img).unsqueeze(0).float().to(device)

      # Calculate embedding
      embedding = face_embedder(img_tensor).squeeze().detach().cpu().numpy()
      embeddings[image_name] = embedding

      # Save the embedding for the original image
      output_emb_path = os.path.join(output_directory, f"{label}_{os.path.splitext(image_name)[0]}_embedding.npy")
      np.save(output_emb_path, embedding)

      # Apply data augmentation
      augmented_img = data_transform(img)

      # Convert augmented image to tensor
      img_tensor_augmented = augmented_img.unsqueeze(0).float().to(device)

      # Calculate embedding for the augmented image
      embedding_augmented = face_embedder(img_tensor_augmented).squeeze().detach().cpu().numpy()
      embeddings[f"{os.path.splitext(image_name)[0]}_augmented_embedding.npy"] = embedding_augmented
      output_emb_path_augmented = os.path.join(output_directory, f"{label}_{os.path.splitext(image_name)[0]}_augmented_embedding.npy")
      np.save(output_emb_path_augmented, embedding_augmented)
    except Exception as e:
      print(f"Error processing {image_name}: {str(e)}")

  return label, embeddings

input_directory = './train_detected_faces'
output_directory = './extracted_embeddings'

# Initialize InceptionResnetV1 for face embedding
device = 'cpu'
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
face_embedder = InceptionResnetV1(pretrained='vggface2').eval().to(device)

# Loop through each folder in the input directory
for folder_name in os.listdir(input_directory):
  folder_path = os.path.join(input_directory, folder_name)

  # Create subfolder in output_directory for the current label
  label_output_directory = os.path.join(output_directory, folder_name)
  os.makedirs(label_output_directory, exist_ok=True)

  # Extract embeddings from the current folder with data augmentation
  label, embeddings = extract_embeddings_from_folder(folder_path, face_embedder, device, label_output_directory)

print("Embeddings extraction complete.")


Processing ./train_detected_faces/Rebecca Ferguson: 100%|██████████| 75/75 [00:17<00:00,  4.25it/s]
Processing ./train_detected_faces/Zendaya: 100%|██████████| 67/67 [00:15<00:00,  4.46it/s]
Processing ./train_detected_faces/Lionel Messi: 100%|██████████| 65/65 [00:14<00:00,  4.35it/s]
Processing ./train_detected_faces/Henry Cavil: 100%|██████████| 60/60 [00:15<00:00,  3.86it/s]
Processing ./train_detected_faces/Madelaine Petsch: 100%|██████████| 62/62 [00:13<00:00,  4.67it/s]
Processing ./train_detected_faces/Robert Downey Jr: 100%|██████████| 65/65 [00:14<00:00,  4.63it/s]
Processing ./train_detected_faces/Jake Mcdorman: 100%|██████████| 81/81 [00:16<00:00,  5.05it/s]
Processing ./train_detected_faces/Tuppence Middleton: 100%|██████████| 73/73 [00:14<00:00,  5.00it/s]
Processing ./train_detected_faces/Josh Radnor: 100%|██████████| 71/71 [00:13<00:00,  5.19it/s]
Processing ./train_detected_faces/barbara palvin: 100%|██████████| 65/65 [00:12<00:00,  5.03it/s]
Processing ./train_detecte

Embeddings extraction complete.



