In [1]:
import os
import shutil
import numpy as np
from mtcnn import MTCNN
import tensorflow as tf
from tqdm import tqdm
from retinaface import RetinaFace

# Set the path to the VGGFace2 dataset
orig_dir = "I:/vggface2"

# Set the path to the directory where you want to copy the selected images
new_dir = "I:/balanced_vggface2"

# Set the path to the text file to save the selected file paths
txt_path = "I:/files.txt"

# Set the number of images to select per person
num_images = 40

# Create a list to store the selected file paths
selected_files = []

# Create the new directory if it doesn't exist
if not os.path.exists(new_dir):
    os.makedirs(new_dir)

# Initialize the MTCNN face detector
# detector = MTCNN()

In [2]:
for person_dir in tqdm(os.listdir(orig_dir)):
    if os.path.isdir(os.path.join(orig_dir, person_dir)):
        # print("Processing person:", person_dir)
        images = os.listdir(os.path.join(orig_dir, person_dir))
        selected_images = np.random.choice(images, size=num_images, replace=False)

        # Create the new directory for this person
        person_new_dir = os.path.join(new_dir, person_dir)
        if not os.path.exists(person_new_dir):
            os.makedirs(person_new_dir)

        # Loop through the selected images and detect and save the faces
        for image_name in selected_images:
            # Load the image
            image_path = os.path.join(orig_dir, person_dir, image_name)
            person_new_dir = os.path.join(new_dir, person_dir)
            # image = tf.io.read_file(image_path)
            # image = tf.image.decode_jpeg(image)

            # Detect faces using MTCNN
            # result = detector.detect_faces(image.numpy())
            # result = RetinaFace.detect_faces(image.numpy())
            faces = RetinaFace.extract_faces(img_path=image_path, align=True)
            # if len(result.keys()) != 1:
            if len(faces) != 1:
                # Skip images with no face or more than one face detected
                # print("Skipped image:", image_name, "for person:", person_dir)
                continue

            # Crop and save the face
            # face = result['face_1']['facial_area'] #[0]["box"]
            face_image = faces[0]
            # x1, y1, w, h = face
            # x2, y2 = x1 + w, y1 + h
            # face_image = image[y1:y2, x1:x2]
            resized_image = tf.image.resize(face_image, [112, 112])
            resized_image = tf.cast(resized_image, tf.uint8)  # Convert float tensor to uint8 tensor
            face_path = os.path.join(person_new_dir, image_name)
            tf.io.write_file(face_path, tf.image.encode_jpeg(resized_image))
            selected_files.append(face_path)

# Save the selected file paths to the text file
with open(txt_path, "w") as f:
    for path in selected_files:
        f.write(path + "\n")

100%|██████████| 9131/9131 [19:11:25<00:00,  7.57s/it]   


In [None]:
# delete ids with less than 30 images and keep only 30 images for each id
import random

# Set the minimum number of images per class
min_images_per_class = 30

# Loop through each subdirectory (i.e., class) in the dataset directory
for class_dir in tqdm(os.listdir(new_dir)):
    # Get the path to the current class directory
    class_path = os.path.join(new_dir, class_dir)
    
    # Check if the current path is a directory (to avoid files)
    if os.path.isdir(class_path):
        # Get the list of all image files in the current class directory
        image_files = [os.path.join(class_path, f) for f in os.listdir(class_path) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.jpeg')]
        # If the number of images is less than the minimum, delete the entire class directory
        if len(image_files) < min_images_per_class:
            print('deleting' , class_dir)
            shutil.rmtree(class_path)
            # If the number of images is greater than or equal to the minimum, randomly select 30 and delete the rest
        else:
            # Shuffle the list of image files randomly
            random.shuffle(image_files)
            
            # Delete all image files after the first 30
            for file_path in image_files[min_images_per_class:]:
                os.remove(file_path)

In [13]:
len(os.listdir(new_dir))

8907

In [2]:
import os
import random
import shutil
from tqdm import tqdm

# Set the paths to your image dataset folders
data_dir = "I:/balanced_vggface2"
train_dir = 'I:/balanced_vggface2/train'
test_dir = 'I:/balanced_vggface2/test'
val_dir = 'I:/balanced_vggface2/val'

# Set the number of images you want for each category
num_train = 20
num_test = 5
num_val = 5

# Get the list of classes in your dataset
classes = os.listdir(data_dir)

# Create the train, test, and val folders if they don't already exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

# Loop through each class and copy the images to the appropriate folders
for class_name in tqdm(classes):
    class_path = os.path.join(data_dir, class_name)
    images = os.listdir(class_path)
    random.shuffle(images)  # Shuffle the list of images
    
    # Copy images to the train folder
    for image in images[:num_train]:
        src_path = os.path.join(class_path, image)
        dst_path = os.path.join(train_dir, class_name, image)
        os.makedirs(os.path.dirname(dst_path), exist_ok=True)
        shutil.copy(src_path, dst_path)
    
    # Copy images to the test folder
    for image in images[num_train:num_train+num_test]:
        src_path = os.path.join(class_path, image)
        dst_path = os.path.join(test_dir, class_name, image)
        os.makedirs(os.path.dirname(dst_path), exist_ok=True)
        shutil.copy(src_path, dst_path)
    
    # Copy images to the validation folder
    for image in images[num_train+num_test:num_train+num_test+num_val]:
        src_path = os.path.join(class_path, image)
        dst_path = os.path.join(val_dir, class_name, image)
        os.makedirs(os.path.dirname(dst_path), exist_ok=True)
        shutil.copy(src_path, dst_path)


100%|██████████| 8907/8907 [22:57<00:00,  6.47it/s] 


In [26]:
import onnx
import warnings
from onnx_tf.backend import prepare
import numpy as np
from datetime import datetime
import tensorflow as tf
import os
import cv2
from tqdm import tqdm
import PIL
from PIL import Image

# all_ids = np.load('./id_files/glint_all_ids.npz')['res']
# warnings.filterwarnings('ignore') # Ignore all the warning messages in this tutorial

onnx_model = onnx.load('F:/test/onnx_tensorflow/model.onnx')
tf_rep = prepare(onnx_model) # Import the ONNX model to Tensorflow

 The versions of TensorFlow you are currently using is 2.11.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [9]:
from tqdm import tqdm
import os
# Set the path to the directory where you want to copy the selected images
new_dir = "I:/balanced_vggface2"

all_ids_dict = dict()

for id in tqdm(os.listdir(new_dir)):
    if id in ['train', 'test', 'val']:
        continue
    all_ids_dict[id] = {'train':[], 'test':[], 'val':[]}
    for file in os.listdir(os.path.join(new_dir, 'train', id)):
        all_ids_dict[id]['train'].append(os.path.join(new_dir, 'train', id, file))
    for file in os.listdir(os.path.join(new_dir, 'test', id)):
        all_ids_dict[id]['test'].append(os.path.join(new_dir, 'test', id, file))
    for file in os.listdir(os.path.join(new_dir, 'val', id)):
        all_ids_dict[id]['val'].append(os.path.join(new_dir, 'val', id, file))

100%|██████████| 8910/8910 [00:03<00:00, 2669.33it/s]


In [17]:
import json
os.makedirs(os.path.join('.', 'vggface2'))
with open(os.path.join('.', 'vggface2', 'all_id_files.json'), 'w') as fp:
    json.dump(all_ids_dict, fp, indent=4)

In [21]:
keys = list(all_ids_dict.keys())

In [35]:
import tensorflow as tf
import PIL
from PIL import Image
import numpy as np

dataset_path = 'I:/balanced_vggface2'

os.makedirs(os.path.join(dataset_path, 'embeddings', 'train'), exist_ok=True)
os.makedirs(os.path.join(dataset_path, 'embeddings', 'test'), exist_ok=True)
os.makedirs(os.path.join(dataset_path, 'embeddings', 'val'), exist_ok=True)

for d in tqdm(keys):
    image_list = []
    for img_path in all_ids_dict[d]['train']:
        img = Image.open(img_path)
        x_train = tf.image.resize(np.array(img), (112, 112), method="nearest")
        x_train = (tf.cast(x_train, tf.float32) - 127.5) / 128.
        x_train = tf.transpose(x_train, perm=[2, 0, 1])
        x_train = tf.expand_dims(x_train, 0)
        image_list.extend(x_train)

    for img_path in all_ids_dict[d]['test']: 
        img = Image.open(img_path)
        x_test = tf.image.resize(np.array(img), (112, 112), method="nearest")
        x_test = (tf.cast(x_test, tf.float32) - 127.5) / 128.
        x_test = tf.transpose(x_test, perm=[2, 0, 1])
        x_test = tf.expand_dims(x_test, 0)
        image_list.extend(x_test)

    for img_path in all_ids_dict[d]['val']: 
        img = Image.open(img_path)
        x_val = tf.image.resize(np.array(img), (112, 112), method="nearest")
        x_val = (tf.cast(x_val, tf.float32) - 127.5) / 128.
        x_val = tf.transpose(x_val, perm=[2, 0, 1])
        x_val = tf.expand_dims(x_val, 0)
        image_list.extend(x_val)
        
    id_emb = tf_rep.run(np.array(image_list))._0
    np.savez_compressed(os.path.join(dataset_path, 'embeddings', 'train', d + '.npz'), res=id_emb[:20])
    np.savez_compressed(os.path.join(dataset_path, 'embeddings', 'test', d + '.npz'), res=id_emb[20:25])
    np.savez_compressed(os.path.join(dataset_path, 'embeddings', 'val', d + '.npz'), res=id_emb[25:])

100%|██████████| 8907/8907 [11:38:04<00:00,  4.70s/it]  
