### Race prediction with classification method
Our race classification model is designed with inclusivity at its core, capable of distinguishing among five distinct racial categories: White, Black, Asian, Indian, and Others. This classification framework allows for a broad understanding of diversity, ensuring that our technology can recognize and appreciate the rich variety of human features across different ethnic backgrounds. By training our model on a diverse dataset, we ensure it's equipped to identify these categories with precision, making it a powerful tool for applications requiring nuanced understanding of racial characteristics.

In [1]:
import os
import numpy as np
import tensorflow as tf
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint
import cv2
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.utils import to_categorical
from keras_vggface.vggface import VGGFace
from keras.models import Model
from keras.layers import Dense, Flatten, BatchNormalization, Dropout
from keras.optimizers import Adam
from tensorflow.keras.layers import SpatialDropout2D, SeparableConv2D, MaxPooling2D

For managing large datasets efficiently and minimizing memory usage, we utilize data generators. These generators stream data in batches directly to the model during training, enabling real-time data augmentation and improving model performance without overwhelming system resources.

In [2]:
#data generator method
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, list_IDs, load_from, batch_size=32, dim=(224,224), n_channels=3,
                 shuffle=True, preprocessing=None):
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.load_from = load_from
        self.preprocessing = preprocessing
        self.on_epoch_end()

    def __len__(self):
        # Denotes the number of batches per epoch
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        # Generate one batch of data
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        list_IDs_temp = [self.list_IDs[k] for k in indexes]
        X, y = self.__data_generation(list_IDs_temp)
        return X, y

    def on_epoch_end(self):
        # Updates indexes after each epoch
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        X = np.empty((self.batch_size, *self.dim, self.n_channels))
        y = np.empty((self.batch_size, 5), dtype=int)  # Assuming 5 classes and one-hot encoding is needed

        for i, ID in enumerate(list_IDs_temp):
            img_path = os.path.join(self.load_from, ID)
            img = load_img(img_path, target_size=self.dim)
            img = img_to_array(img)
            img = self.preprocessing(img) if self.preprocessing else img
            X[i,] = img

            # Extracting the label assuming it's in the 3rd part of the filename after splitting
            # Adjust the index according to the actual position of the label in your filenames
            label_part = ID.split('_')[2]  # This needs to match your filename structure
            try:
                label = int(label_part)  # Make sure this part is correctly convertible to an integer
            except ValueError as e:
                print(f"Error converting label to int for ID: {ID} - Error: {e}")
                continue  # Skip this sample or handle error as appropriate

            y[i] = to_categorical(label, num_classes=5)

        return X, y



In [4]:
#data gen config
load_from_train = 'data/train_race/'
load_from_val = 'data/test/'

def preprocess_input_vggface(x):
    x = np.array(x, dtype=np.float32)
    x[..., 0] -= 93.5940  # Subtract the mean red value
    x[..., 1] -= 104.7624 # Subtract the mean green value
    x[..., 2] -= 129.1863 # Subtract the mean blue value
    return x

# Parameters
batch_size = 20
params = {'dim': (224, 224),
          'batch_size': batch_size,
          'n_channels': 3,
          'shuffle': True,
          'preprocessing': preprocess_input_vggface}

ids_train = os.listdir(load_from_train) # IDs for training
ids_val = os.listdir(load_from_val) # IDs for validation

training_generator = DataGenerator(list_IDs = ids_train, load_from = load_from_train, **params)
validation_generator = DataGenerator(list_IDs = ids_val, load_from = load_from_val, **params)

### Build the model with VGGFace

In [5]:
# Load VGGFace model
base_model = VGGFace(model='senet50', include_top=False, input_shape=(224, 224, 3), pooling='avg')

# Unfreeze the last convolutional layer
base_model.layers[-2].trainable = True

# Fully connected system
x = base_model.output
x = Flatten()(x)
x = BatchNormalization()(x)
x = Dropout(0.6)(x)
x = Dense(128, activation='relu')(x)
x = Dropout(0.6)(x)
x = Dense(64, activation='relu')(x)
x = BatchNormalization()(x)

# Output layer for 4-class classification
predictions = Dense(5, activation='softmax')(x)  # Use softmax for multi-class classification

# This is the model we will train for classification
model = Model(inputs=base_model.input, outputs=predictions)

# Compile the model for classification
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])


In [6]:
#lr_reduce = ReduceLROnPlateau(monitor='val_accuracy', factor=0.6, patience=8, verbose=1, mode='max', min_lr=5e-5)
os.makedirs('saved_models_race', exist_ok=True)
filename = 'saved_models_race/-{epoch:02d}---{val_loss:.4f}-{val_accuracy:.4f}---{loss:.4f}-{accuracy:.4f}.h5'

checkpoint = ModelCheckpoint(filename, monitor= 'val_accuracy', mode= 'max', save_best_only = True, verbose= 1)
reduceLROnPlat = ReduceLROnPlateau(monitor='val_accuracy', factor=0.8, patience=4, verbose=1, mode='auto', min_delta=0.0001, min_lr=0.0001)
callbacks_list = [checkpoint, reduceLROnPlat]

In [8]:
# Fit the model using the Dataset
history = model.fit(
    training_generator,
    validation_data=validation_generator,
    epochs=30,
    callbacks = callbacks_list
)

Epoch 1/30
Epoch 1: val_accuracy did not improve from 0.83776
Epoch 2/30
Epoch 2: val_accuracy did not improve from 0.83776
Epoch 3/30
Epoch 3: val_accuracy did not improve from 0.83776
Epoch 4/30
Epoch 4: val_accuracy did not improve from 0.83776
Epoch 5/30
Epoch 5: val_accuracy did not improve from 0.83776
Epoch 6/30
Epoch 6: val_accuracy did not improve from 0.83776
Epoch 7/30
Epoch 7: val_accuracy did not improve from 0.83776
Epoch 8/30
Epoch 8: val_accuracy did not improve from 0.83776
Epoch 9/30
Epoch 9: val_accuracy did not improve from 0.83776
Epoch 10/30
Epoch 10: val_accuracy did not improve from 0.83776
Epoch 11/30
Epoch 11: val_accuracy did not improve from 0.83776
Epoch 12/30
Epoch 12: val_accuracy did not improve from 0.83776
Epoch 13/30
Epoch 13: val_accuracy did not improve from 0.83776
Epoch 14/30
Epoch 14: val_accuracy did not improve from 0.83776
Epoch 15/30
Epoch 15: val_accuracy did not improve from 0.83776
Epoch 16/30
Epoch 16: val_accuracy did not improve from 0.

In [None]:
k