In [None]:
# ref :  https://www.acceluniverse.com/blog/developers/2020/03/pythonefficientnet-multi-outpu.html

In [4]:
# organizing Dataset 
import os
import cv2 
import numpy as np
import matplotlib.pyplot as plt
import random 
import pandas as pd 

In [12]:
train_df = pd.read_csv("fairface_label_train.csv") # shape is (86744, 5)
test_df = pd.read_csv("fairface_label_val.csv") # shape is (10954, 5) 

In [13]:
train_df

Unnamed: 0,file,age,gender,race,service_test
0,train/1.jpg,50-59,Male,East Asian,True
1,train/2.jpg,30-39,Female,Indian,False
2,train/3.jpg,3-9,Female,Black,False
3,train/4.jpg,20-29,Female,Indian,True
4,train/5.jpg,20-29,Female,Indian,True
...,...,...,...,...,...
86739,train/86740.jpg,20-29,Male,Indian,True
86740,train/86741.jpg,10-19,Male,Indian,True
86741,train/86742.jpg,more than 70,Female,Indian,True
86742,train/86743.jpg,10-19,Female,Black,True


In [14]:
train_df["age_id"] = train_df["age"].astype("category").cat.codes
train_df["race_id"] = train_df["race"].astype("category").cat.codes
train_df["gender_id"] = train_df["gender"].astype("category").cat.codes


In [28]:
race_c = train_df["race"].astype("category").astype("category")
print(race_c)


0        East Asian
1            Indian
2             Black
3            Indian
4            Indian
            ...    
86739        Indian
86740        Indian
86741        Indian
86742         Black
86743         White
Name: race, Length: 86744, dtype: category
Categories (7, object): ['Black', 'East Asian', 'Indian', 'Latino_Hispanic', 'Middle Eastern', 'Southeast Asian', 'White']


In [29]:
gender_c = train_df["gender"].astype("category").astype("category")
print(gender_c)


0          Male
1        Female
2        Female
3        Female
4        Female
          ...  
86739      Male
86740      Male
86741    Female
86742    Female
86743      Male
Name: gender, Length: 86744, dtype: category
Categories (2, object): ['Female', 'Male']


In [30]:
age_c = train_df["age"].astype("category").astype("category")
print(age_c)


0               50-59
1               30-39
2                 3-9
3               20-29
4               20-29
             ...     
86739           20-29
86740           10-19
86741    more than 70
86742           10-19
86743           40-49
Name: age, Length: 86744, dtype: category
Categories (9, object): ['0-2', '10-19', '20-29', '3-9', ..., '40-49', '50-59', '60-69', 'more than 70']


In [22]:
train_df

Unnamed: 0,file,age,gender,race,service_test,age_id,race_id,gender_id
0,train/1.jpg,50-59,Male,East Asian,True,6,1,1
1,train/2.jpg,30-39,Female,Indian,False,4,2,0
2,train/3.jpg,3-9,Female,Black,False,3,0,0
3,train/4.jpg,20-29,Female,Indian,True,2,2,0
4,train/5.jpg,20-29,Female,Indian,True,2,2,0
...,...,...,...,...,...,...,...,...
86739,train/86740.jpg,20-29,Male,Indian,True,2,2,1
86740,train/86741.jpg,10-19,Male,Indian,True,1,2,1
86741,train/86742.jpg,more than 70,Female,Indian,True,8,2,0
86742,train/86743.jpg,10-19,Female,Black,True,1,0,0


# 学習コード

In [23]:
import glob
import math
import os
import random

import cv2
import numpy as np
import pandas as pd
import scipy
from matplotlib import pyplot as plt
from PIL import Image
from scipy.stats import multivariate_normal
from skimage import io
from sklearn.model_selection import train_test_split
from tqdm import tqdm

import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2, ResNet50
from tensorflow.keras.layers import (
    Activation,
    AveragePooling2D,
    MaxPooling2D,
    BatchNormalization,
    Concatenate,
    Conv2D,
    Conv2DTranspose,
    Dense,
    Dropout,
    Flatten,
    GlobalAveragePooling2D,
    Input,
    MaxPool2D,
    Reshape,
    UpSampling2D,
    concatenate,
)
from tensorflow.keras.applications import EfficientNetB0

from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import SGD, Adam

# import tensorflow_addons as tfa
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [45]:
seed = 42
NUM_CLASSES_GENDER=2
NUM_CLASSES_RACE=7
NUM_CLASSES_AGE=9
EPOCHS=30
INPUT_SIZE = (224, 224)
BATCH_SIZE = 16
out_root = "./results/"

In [35]:
from datetime import datetime, timezone, timedelta

tz_jst = timezone(timedelta(hours=9))
train_start_time = datetime.now(tz=tz_jst).strftime('%Y%m%d_%H%M')

out_root = out_root + train_start_time +  "/"

In [25]:
X_paths = train_df["file"].values.tolist()
y = train_df["race"].values.tolist()

In [26]:
X_train, X_val, y_train, y_val = train_test_split(
    X_paths, y, test_size=0.2, random_state=seed
)

In [39]:
train_df["is_train"] = train_df['file'].isin(X_train)

In [32]:
info = {}
info["num_classes_gender"] = NUM_CLASSES_GENDER
info["num_classes_race"] = NUM_CLASSES_RACE
info["num_classes_age"] = NUM_CLASSES_AGE
info["input_shape"] = INPUT_SIZE
info["batch_size"] = BATCH_SIZE

In [36]:
import albumentations as A

transform = A.Compose(
    [
        A.HorizontalFlip(p=0.5),
        A.RandomBrightnessContrast(
            brightness_limit=(-0.1, 0.1), contrast_limit=(-0.1, 0.1), p=0.5
        ),
        A.ShiftScaleRotate(p=0.5, shift_limit=0.0625, rotate_limit=(-5, 5)),
        A.RandomResizedCrop(
            width=info["input_shape"][1],
            height=info["input_shape"][0],
            scale=(0.9, 1.1),
        ),
    ]
)

In [98]:
class DataGenerator(tf.keras.utils.Sequence):
    def __init__(
        self, df, subset="train", shuffle=False, preprocess=None, info={}
    ):
        super().__init__()
        self.paths = df["file"].values
        self.labels_age = df["age_id"].values
        self.labels_race = df["race_id"].values
        self.labels_gender = df["gender_id"].values

        self.subset = subset
        self.shuffle = shuffle
        self.preprocess = preprocess
        self.info = info
        self.num_class_gender = info["num_classes_gender"] 
        self.num_class_race = info["num_classes_race"] 
        self.num_class_age = info["num_classes_age"] 
        self.input_shape = info["input_shape"]
        self.batch_size = info["batch_size"]
        self.on_epoch_end()

    def __len__(self):
        return int(np.floor(len(self.paths) / self.batch_size))

    def on_epoch_end(self):
        self.indexes = np.arange(len(self.paths))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __getitem__(self, index):

        X = np.empty(
            (self.batch_size, self.input_shape[0], self.input_shape[1], 3),
            dtype=np.float32,
        )
        y_age = np.zeros((self.batch_size, self.num_class_age), dtype=np.uint8)
        y_race = np.zeros((self.batch_size, self.num_class_race), dtype=np.uint8)
        y_gender = np.zeros((self.batch_size, self.num_class_gender), dtype=np.uint8)
        indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]
        
        
        for i, idx in enumerate(indexes):
            # images
            img_path = self.paths[idx]
            image = (np.array(Image.open(img_path).convert("RGB")) / 255).astype(
                np.float32
            )
            if len(image.shape) == 2:
                image = np.stack([image, image, image], 2)
            
            # labels
            y_age[i, self.labels_age[idx]] = 1
            y_race[i, self.labels_race[idx]] = 1
            y_gender[i, self.labels_gender[idx]] = 1

            # ================================
            # preprocessing with albumentations
            # ================================
            if self.preprocess != None:
                transformed = self.preprocess(image=image)
                X[i,] = transformed[
                    "image"
                ].astype(np.float32)
            else:
                X[i,] = cv2.resize(
                    image,
                    dsize=(self.input_shape[1], self.input_shape[0]),
                    interpolation=cv2.INTER_CUBIC,
                ).astype(np.float32)

        
        # ================================
        # return
        # ================================
        return X.astype(np.float32), [y_age.astype(np.float32), y_race.astype(np.float32), y_gender.astype(np.float32)]

In [99]:
train_datagenerator = DataGenerator(
    train_df[train_df["is_train"]], subset="train", shuffle=True, preprocess=transform, info=info
)

valid_datagenerator = DataGenerator(
    train_df[~train_df["is_train"]], subset="valid", shuffle=False, preprocess=None, info=info
)

In [100]:
train_datagenerator[0][1][0].shape

(16, 9)

In [101]:
def myMobileNetV2(n_classes, input_size=(224, 224, 3)):
    """Input"""
    inputs = Input(input_size)

    """ Encoder """
    # model = EfficientNetB0(weights='imagenet')
    
    encoder = MobileNetV2(weights="imagenet", include_top=False, input_tensor=inputs)
    h = Flatten()(encoder.output)
    model_output_age = Dense(n_classes[0], activation="softmax", name='age_output')(h)
    model_output_race = Dense(n_classes[1], activation="softmax", name='race_output')(h)
    model_output_gender = Dense(n_classes[2], activation="softmax", name='gender_output')(h)

    model = Model(inputs, [model_output_age, model_output_race, model_output_gender])
    return model

In [104]:
model = myMobileNetV2(n_classes=[ NUM_CLASSES_AGE, NUM_CLASSES_RACE, NUM_CLASSES_GENDER])
steps_per_epoch = np.ceil((len(X_train)) / BATCH_SIZE)
steps_per_epoch_val = np.ceil((len(X_val)) / BATCH_SIZE)

if True:
    es = tf.keras.callbacks.EarlyStopping(
        monitor="val_accuracy", mode="max", verbose=1, patience=10
    )  # Early stopping (stops training when validation doesn't improve for {patience} epochs)
    save_best = tf.keras.callbacks.ModelCheckpoint(
        f"{out_root}myMobileNetV2.h5", monitor="val_race_output_accuracy", save_best_only=True, mode="max"
    )  # Saves the best version of the model to disk (as measured on the validation data set)
    learning_rate_reduction = tf.keras.callbacks.ReduceLROnPlateau(
        monitor="val_race_output_accuracy", patience=2, verbose=1, factor=0.1
    )

    # Warming up
    model.compile(optimizer=SGD(learning_rate=1e-4), 
              loss={'age_output': 'categorical_crossentropy', 'race_output': 'categorical_crossentropy', 'gender_output': 'categorical_crossentropy'},
              metrics={'age_output': 'accuracy', 'race_output': 'accuracy', 'gender_output': 'accuracy'})


    history = model.fit_generator(
        train_datagenerator,
        epochs=EPOCHS,
        validation_data=valid_datagenerator,
        callbacks=[es, save_best, learning_rate_reduction],
    )

    # summarize history for loss
    plt.plot(history.history["loss"])
    plt.plot(history.history["val_loss"])
    plt.title("model loss")
    plt.ylabel("loss")
    plt.xlabel("epoch")
    plt.legend(["train", "validation"], loc="upper left")
    plt.savefig(f"{out_root}loss.jpg")
    plt.show()
    model.load_weights(f"{out_root}myMobileNetV2.h5")
else:
    model.load_weights(f"{out_root}myMobileNetV2.h5")
    # model.evaluate(val_datagen, verbose=1)

Epoch 1/30




Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)

