# Student Names:
Rebecca Moore


1.   Task 1 - basic CNN
2.   Task 3 - VGG16 transfer Learning

Manish Reddy


1.   Task 1 - basic CNN
2.   Task 2 - Hash Filter and error function

# Setup

Imports

In [52]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np 
import pandas as pd
import os
import glob
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from google.colab import drive

from tensorflow.keras.utils import to_categorical
from PIL import Image
from tensorflow.keras.optimizers import Adam

from keras.models import Model
from tensorflow.keras.layers import BatchNormalization
from keras.layers.convolutional import Conv2D
from keras.layers.convolutional import MaxPooling2D
from keras.layers.core import Activation
from keras.layers.core import Dropout
from keras.layers.core import Lambda
from keras.layers.core import Dense
from keras.layers import Flatten
from keras.layers import Input
import tensorflow as tf
import plotly.graph_objects as go
from keras.callbacks import ModelCheckpoint

Mounting google drive because that is where the directory with the images is stored, set up mapping for gender and races

In [46]:
drive.mount('/content/drive')

data_name = '/content/drive/MyDrive/UTKFace'

image_size = (224, 224)
batch_size = 32

TT_SPLIT = 0.7
IM_WIDTH = 198
IM_HEIGHT = 198

dict_categories = {
    'race_id': {0: 'white', 1: 'black', 2: 'asian', 3: 'indian', 4: 'others'},
    'sex_id': {0: 'male',1: 'female'}}

dict_categories['sex_tmp'] = dict((g, i) for i, g in dict_categories['sex_id'].items())
dict_categories['race_tmp'] = dict((r, i) for i, r in dict_categories['race_id'].items())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Parse through the directory and image files to extract the labels and create a dataframe


In [47]:
def parse(dataset_path, ext='jpg'):
    def extract(path):
        try:
            filename = os.path.split(path)[1]
            filename = os.path.splitext(filename)[0]
            age, gender, race, _ = filename.split('_')

            return int(age), dict_categories['sex_id'][int(gender)], dict_categories['race_id'][int(race)]
        except Exception as ex:
            return None, None, None
        
    files = glob.glob(os.path.join(dataset_path, "*.%s" % ext))
    records = []
    for file in files:
        info = extract(file)
        records.append(info)   
    df = pd.DataFrame(records)
    df['file'] = files
    df.columns = ['age', 'gender', 'race', 'file']
    df = df.dropna()
    return df

In [36]:
df = parse(data_name)
df.head()

Unnamed: 0,age,gender,race,file
0,56.0,male,white,/content/drive/MyDrive/UTKFace/56_0_0_20170119...
1,56.0,male,indian,/content/drive/MyDrive/UTKFace/56_0_3_20170119...
2,56.0,male,indian,/content/drive/MyDrive/UTKFace/56_0_3_20170119...
3,56.0,male,asian,/content/drive/MyDrive/UTKFace/56_0_2_20170119...
4,56.0,male,indian,/content/drive/MyDrive/UTKFace/56_0_3_20170119...


# Generator for CNN

In [53]:
class UtkGenerator():
    def __init__(self, df):
        self.df = df 
    def split(self):
        p = np.random.permutation(len(self.df))
        spliter = int(len(self.df) * TT_SPLIT)
        train = p[:spliter]
        test = p[spliter:]

        spliter = int(spliter * TT_SPLIT)
        train, valid = train[:spliter], train[spliter:]
        
        self.df['sex_id'] = self.df['gender'].map(lambda sex: dict_categories['sex_tmp'][sex])
        self.df['race_id'] = self.df['race'].map(lambda race: dict_categories['race_tmp'][race])

        self.max_age = self.df['age'].max()
        
        return train, valid, test
    
    def preprocess_image(self, img_path):
        im = Image.open(img_path)
        im = im.resize((IM_WIDTH, IM_HEIGHT))
        im = np.array(im) / 255.0
        
        return im
        
    def generate_images(self, image_idx, is_training, batch_size=16):
        images, ages, races, sexs = [], [], [], []
        while True:
            for idx in image_idx:
                f = self.df.iloc[idx]
                
                age = f['age']
                race = f['race_categorical']
                sex = f['sex_categorical']
                file = f['file']
                
                im = self.preprocess_image(file)
                ages.append(age / self.max_age)
                races.append(to_categorical(race, len(dict_categories['race_categorical'])))
                sexs.append(to_categorical(sex, len(dict_categories['sex_id'])))
                images.append(im)

                if len(images) >= batch_size:
                    yield np.array(images), [np.array(ages), np.array(races), np.array(sexs)]
                    images, ages, races, sexs = [], [], [], []
                    
            if not is_training:
                break
                
generator = UtkGenerator(df)
train, valid, test = generator.split()

# Set up the new model for the UTK dataframe off of the cats v dog CNN

In [50]:
class utkmodel():
    def hidden_layers(self, inputs):
        x = Conv2D(16, (3, 3), padding="same")(inputs)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(3, 3))(x)
        x = Dropout(0.25)(x)

        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)

        x = Conv2D(32, (3, 3), padding="same")(x)
        x = Activation("relu")(x)
        x = BatchNormalization(axis=-1)(x)
        x = MaxPooling2D(pool_size=(2, 2))(x)
        x = Dropout(0.25)(x)
        return x

    def race(self, inputs, num_races):
        x = self.hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_races)(x)
        x = Activation("softmax", name="race_output")(x)
        return x

    def sex(self, inputs, num_sexs=2):
        x = Lambda(lambda c: tf.image.rgb_to_grayscale(c))(inputs)
        x = self.hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(num_sexs)(x)
        x = Activation("sigmoid", name="sex_output")(x)
        return x

    def age(self, inputs):   
        x = self.hidden_layers(inputs)
        x = Flatten()(x)
        x = Dense(128)(x)
        x = Activation("relu")(x)
        x = BatchNormalization()(x)
        x = Dropout(0.5)(x)
        x = Dense(1)(x)
        x = Activation("linear", name="age_output")(x)
        return x

    def compile_model(self, width, height, num_races):
        input_shape = (height, width, 3)
        x = Input(shape=input_shape)
        a_branch = self.age(x)
        r_branch = self.race(x, num_races)
        s_branch = self.sex(x)

        model = Model(inputs=x,
                     outputs = [a_branch, r_branch, s_branch],
                     name="utk_cnn_p2")

        return model
    
model = utkmodel().compile_model(IM_WIDTH, IM_HEIGHT, num_races= 5)

# Configure and Compile the model

In [51]:
learning_rate = 1e-4
epochs = 5
opt = Adam(learning_rate=learning_rate, decay=learning_rate / epochs)
model.compile(optimizer=opt, 
              loss={
                  'age': 'mse', 
                  'race': 'categorical_crossentropy', 
                  'sex': 'binary_crossentropy'},
              loss_weights={
                  'age': 4., 
                  'race': 1.5, 
                  'sex': 0.1},
              metrics={
                  'age': 'mae', 
                  'race': 'accuracy',
                  'sex': 'accuracy'})

# Training

In [7]:
batch_size = 32
valid_batch_size = 32
train_gen = data_generator.generate_images(train, True, batch_size=batch_size)
valid_gen = data_generator.generate_images(valid, True, batch_size=valid_batch_size)

callbacks = [
    ModelCheckpoint("./model_checkpoint", monitor='val_loss')
]

history = model.fit(train_gen,
                    steps_per_epoch=len(train)//batch_size,
                    epochs=epochs,
                    callbacks=callbacks,
                    validation_data=valid_gen,
                    validation_steps=len(valid)//valid_batch_size)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


# Plot for overall Loss

In [13]:
fig = go.Figure()
fig.add_trace(go.Scattergl(
                    y=history.history['loss'],
                    name='Train'))

fig.add_trace(go.Scattergl(
                    y=history.history['val_loss'],
                    name='Valid'))


fig.update_layout(height=500, 
                  width=700,
                  title='Overall loss',
                  xaxis_title='Epoch',
                  yaxis_title='Loss')

fig.show()