## Import Modules

In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import cv2
from tqdm.notebook import tqdm
warnings.filterwarnings('ignore')
%matplotlib inline

## Feature extraction

In [2]:
%pip install git+https://github.com/serengil/deepface.git

Collecting git+https://github.com/serengil/deepface.git
  Cloning https://github.com/serengil/deepface.git to /tmp/pip-req-build-pvdst3ky
  Running command git clone --filter=blob:none --quiet https://github.com/serengil/deepface.git /tmp/pip-req-build-pvdst3ky
  Resolved https://github.com/serengil/deepface.git to commit c04e51e472be354cd2e5333c3dab9a173876d5ae
  Preparing metadata (setup.py) ... [?25ldone
Collecting gdown>=3.10.1 (from deepface==0.0.91)
  Downloading gdown-5.2.0-py3-none-any.whl.metadata (5.8 kB)
Collecting mtcnn>=0.1.0 (from deepface==0.0.91)
  Downloading mtcnn-0.1.1-py3-none-any.whl.metadata (5.8 kB)
Collecting retina-face>=0.0.1 (from deepface==0.0.91)
  Downloading retina_face-0.0.17-py3-none-any.whl.metadata (10 kB)
Collecting fire>=0.4.0 (from deepface==0.0.91)
  Downloading fire-0.6.0.tar.gz (88 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m88.4/88.4 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0mta [36m0:00:01[0m
[?25h  Preparing meta

In [3]:
def get_feature(df, filename = None):
    from deepface import DeepFace
    X = []
    error_images = []
    for image in tqdm(df['image']):
        try:
            embedding_objs = DeepFace.represent(img_path = image, enforce_detection = True)
            X.append(embedding_objs[0]['embedding'])
        except ValueError:
            error_images.append(image)
    X = np.array(X)
    if filename:
        np.save(f'{filename}.npy', X)
    return X

#### Get dataset UTK Face

In [4]:
BASE_DIR = '../input/utkface-new/UTKFace/'

# labels - age, gender, ethnicity
image_paths = []
age_labels = []
gender_labels = []

for filename in tqdm(os.listdir(BASE_DIR)):
    image_path = os.path.join(BASE_DIR, filename)
    temp = filename.split('_')
    age = int(temp[0])
    gender = int(temp[1])
    image_paths.append(image_path)
    age_labels.append(age)
    gender_labels.append(gender)
    
df = pd.DataFrame()
df['image'], df['age'], df['gender'] = image_paths, age_labels, gender_labels
df.head()

  0%|          | 0/23708 [00:00<?, ?it/s]

Unnamed: 0,image,age,gender
0,../input/utkface-new/UTKFace/26_0_2_2017010402...,26,0
1,../input/utkface-new/UTKFace/22_1_1_2017011223...,22,1
2,../input/utkface-new/UTKFace/21_1_3_2017010500...,21,1
3,../input/utkface-new/UTKFace/28_0_0_2017011718...,28,0
4,../input/utkface-new/UTKFace/17_1_4_2017010322...,17,1


#### Get dataset asia afad

In [5]:
BASE_DIR = '../input/asia-afad/AFAD-Full/'
# labels - age, gender, ethnicity
image_paths = []
age_labels = []
gender_labels = []

for age_dir in os.listdir(BASE_DIR):
    if age_dir.isdigit():
        age = int(age_dir)

        for gender_dir in os.listdir(os.path.join(BASE_DIR, age_dir)):
            if gender_dir.isdigit():
                gender = int(gender_dir)

                for image_file in os.listdir(os.path.join(BASE_DIR, age_dir, gender_dir)):
                    if image_file.endswith('.db'):
                        continue
                    image_path = os.path.join(BASE_DIR, age_dir, gender_dir, image_file)
                    image_paths.append(image_path)
                    if(gender == 111):
                        gender_labels.append(0)
                    if(gender == 112):
                        gender_labels.append(1)
                    age_labels.append(age)
    
df2 = pd.DataFrame()
df2['image'], df2['age'], df2['gender'] = image_paths, age_labels, gender_labels
df2.head()

Unnamed: 0,image,age,gender
0,../input/asia-afad/AFAD-Full/47/112/855927-0.jpg,47,1
1,../input/asia-afad/AFAD-Full/47/112/855705-0.jpg,47,1
2,../input/asia-afad/AFAD-Full/47/112/854778-0.jpg,47,1
3,../input/asia-afad/AFAD-Full/47/112/854588-0.jpg,47,1
4,../input/asia-afad/AFAD-Full/47/112/857962-0.jpg,47,1


#### Extract feature

In [6]:
# utk-face
get_feature(df, filename='utkface-feature-new')

2024-05-18 16:08:54.783800: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-05-18 16:08:54.783896: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-05-18 16:08:54.934986: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


24-05-18 16:09:06 - Directory /root/.deepface created
24-05-18 16:09:06 - Directory /root/.deepface/weights created


  0%|          | 0/23708 [00:00<?, ?it/s]

24-05-18 16:09:07 - vgg_face_weights.h5 will be downloaded...


Downloading...
From: https://github.com/serengil/deepface_models/releases/download/v1.0/vgg_face_weights.h5
To: /root/.deepface/weights/vgg_face_weights.h5

  0%|          | 0.00/580M [00:00<?, ?B/s][A
  2%|▏         | 13.1M/580M [00:00<00:04, 129MB/s][A
  8%|▊         | 48.8M/580M [00:00<00:02, 262MB/s][A
 15%|█▍        | 84.4M/580M [00:00<00:01, 303MB/s][A
 21%|██        | 120M/580M [00:00<00:01, 323MB/s] [A
 27%|██▋       | 156M/580M [00:00<00:01, 335MB/s][A
 33%|███▎      | 191M/580M [00:00<00:01, 341MB/s][A
 39%|███▉      | 228M/580M [00:00<00:01, 348MB/s][A
 45%|████▌     | 263M/580M [00:00<00:00, 349MB/s][A
 51%|█████▏    | 298M/580M [00:00<00:00, 351MB/s][A
 58%|█████▊    | 334M/580M [00:01<00:00, 353MB/s][A
 64%|██████▍   | 370M/580M [00:01<00:00, 353MB/s][A
 70%|██████▉   | 406M/580M [00:01<00:00, 352MB/s][A
 76%|███████▌  | 441M/580M [00:01<00:00, 351MB/s][A
 82%|████████▏ | 477M/580M [00:01<00:00, 351MB/s][A
 88%|████████▊ | 513M/580M [00:01<00:00, 353MB/s][

KeyboardInterrupt: 

In [7]:
# asia-afad
X1 = get_feature(df2.iloc[:82750])
X2 = get_feature(df2.iloc[82750:])
X = np.concatenante((X1, X2), axis=0)
np.save('asia-afad-feature.npy', X)

  0%|          | 0/82750 [00:00<?, ?it/s]

KeyboardInterrupt: 