In [None]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
import os
from tqdm import tqdm

import tensorflow as tf

import keras
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint,EarlyStopping
from keras.layers import Dense, Activation, Dropout, Flatten, Input, Convolution2D, ZeroPadding2D, MaxPooling2D, Activation
from keras.layers import Conv2D, AveragePooling2D
from keras.models import Model, Sequential

from sklearn.model_selection import train_test_split

from keras import metrics

from keras.models import model_from_json
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('../input/utk-face-extracted-info/extracted_info.csv')
df.dropna(inplace=True)
df = df[df['Age'] <= 90]
df = df[df['Age'] >= 0]
df = df.reset_index()
df = df.drop(columns = {'DateTime', 'Unnamed: 0', 'index'})
df['Name'] = df['Name'].apply(lambda x: '../input/utkface-new/UTKFace/' + str(x))

num_classes = len(df['Age'].unique())
df.head()

In [None]:
df.hist()

In [None]:
'''
len(df[df['Age']>93]['Age']) ------- just 46 people
classes = 101 #(0, 100])
print("number of output classes: ",classes)
'''

df['Age'] = df['Age'] - 1

In [None]:
df_data = df.Name
y_data = df.Age
y2_data = df.Gender
X_train, X_test, y_train, y_test = train_test_split(df_data, y_data, test_size=0.20, random_state=40)
d = {'Name':X_train,'Age':y_train}
df_train = pd.concat(d,axis=1)
df_train.head(3)

In [None]:
df_data = df_train.Name
y_data = df_train.Age
y2_data = df.Gender
X_train, X_val, y_train, y_val = train_test_split(df_data, y_data, test_size=0.1, random_state=42)
d = {'Name':X_train,'Age':y_train}
train = pd.concat(d,axis=1)
train.head(3)

In [None]:
d = {'Name':X_val,'Age':y_val}
val = pd.concat(d,axis=1)
print(val.head(3))
###################
d = {'Name':X_test,'Age':y_test}
df_test = pd.concat(d,axis=1)
print(df_test.head(3))

In [None]:
train['Age'] = train['Age'].astype('str')
df_test['Age'] = df_test['Age'].astype('str')
val['Age'] = val['Age'].astype('str')

In [None]:
print(len(train['Age'].unique()),"\n", val['Age'])
print(len(train))
print(len(df[df['Age']>=80]))

In [None]:
batch = 512
train_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)
train_data = train_gen.flow_from_dataframe(dataframe = train, 
                                           #directory = train_folder, 
                                           x_col = 'Name',
                                           y_col = 'Age', seed = 42,
                                           batch_size = batch,
                                           shuffle = True, 
                                           class_mode="sparse",
                                           target_size = (224, 224))

test_data = test_gen.flow_from_dataframe(dataframe = df_test,
                                         #directory = test_folder,
                                         x_col = 'Name',
                                         y_col = 'Age',
                                         batch_size = batch,
                                         shuffle = True,
                                         class_mode='sparse',
                                         target_size = (224, 224))

val_data = train_gen.flow_from_dataframe(dataframe = val, 
                                           #directory = train_folder, 
                                           x_col = 'Name',
                                           y_col = 'Age', seed = 42,
                                           batch_size = batch,
                                           shuffle = True, 
                                           class_mode="sparse",
                                           target_size = (224, 224))

In [None]:
model = Sequential()
model.add(ZeroPadding2D((1,1),input_shape=(224,224, 3)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(ZeroPadding2D((1,1)))
model.add(Convolution2D(512, (3, 3), activation='relu'))
model.add(MaxPooling2D((2,2), strides=(2,2)))
 
model.add(Convolution2D(4096, (7, 7), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(4096, (1, 1), activation='relu'))
model.add(Dropout(0.5))
model.add(Convolution2D(2622, (1, 1)))
model.add(Flatten())
model.add(Activation('softmax'))

model.load_weights('../input/vggfaceweights/vgg_face_weights.h5')

In [None]:
model.output_shape

In [None]:
base_model_output = Convolution2D(num_classes, (1, 1), name='predictions')(model.layers[-4].output)
base_model_output = Flatten()(base_model_output)
base_model_output = Activation('softmax')(base_model_output)

base_model_output

In [None]:
age_model = Model(inputs=model.input, outputs=base_model_output)
age_model

for layer in model.layers[:-6]:
    layer.trainable = False

age_model.compile(loss=tf.losses.SparseCategoricalCrossentropy(from_logits=True)
                  , optimizer=keras.optimizers.Adam()
                  , metrics=['accuracy']
                 )
checkpointer = ModelCheckpoint(
    filepath='classification_age_model_utk.hdf5'
    , monitor = "val_loss"
    , verbose=1
    , save_best_only=True
    , mode = 'auto'
)
target = df['Age'].values
target_classes = keras.utils.to_categorical(target, num_classes)
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=3)

In [None]:
history_2 = age_model.fit(
    train_data,
    validation_data=val_data,
    epochs= 10,
    callbacks = [checkpointer],
    shuffle=False
)
eff_epochs_2 = len(history_2.history['loss'])

In [None]:
model.summary()

In [None]:
B = age_model.predict(test_data)
output_indexes = np.array([i for i in range(0, num_classes)])
apparent_predictions = np.sum(B * output_indexes, axis = 1)

In [None]:
df_test['Weighted_Avg'] = apparent_predictions
argmax = []
for p in B: 
    predm = np.argmax(p)
    argmax.append(predm)
df_test['ArgMax'] = argmax
df_test.to_csv('Final__wtd_avg')

In [None]:
df_test

In [None]:
mae = 0

for i in range(0 ,apparent_predictions.shape[0]):
    prediction = int(apparent_predictions[i])
#     actual = int(df_test.iloc[i]['Age'])
    actual = int(argmax[i])
    
    abs_error = abs(prediction - actual)
    
    mae = mae + abs_error
    
mae = mae / apparent_predictions.shape[0]

print("mae: ",mae)
print("instances: ",apparent_predictions.shape[0])