<a href="https://www.kaggle.com/code/nicksv03/age-gender-detection?scriptVersionId=134788621" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Importing Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from PIL import Image
import os
import warnings
warnings.filterwarnings('ignore')

# Importing Data

Converting our dataset into TFRecord files. 

In [None]:
maindir = "/kaggle/input/utkface-new/UTKFace"
files = os.listdir(maindir)
size = len(files)
print('total files:', size)

In [None]:
tf.random.set_seed(51)
np.random.seed(51)

In [None]:
np.random.shuffle(files)

In [None]:
files[0]

In [None]:
fig, ax = plt.subplots(2,5, figsize=(12, 6), sharex=True, sharey=True)
for image_idx in range(10):
    ax[image_idx//5, image_idx%5].imshow(Image.open(os.path.join(maindir,files[image_idx])))
    ax[image_idx//5, image_idx%5].axis('off')
plt.show()

In [None]:
BytesList = tf.train.BytesList
FloatList = tf.train.FloatList
Int64List = tf.train.Int64List
Feature = tf.train.Feature
Features = tf.train.Features
Example = tf.train.Example

In [None]:
def create_example(folder_path, filepath):
    full_path = os.path.join(folder_path, filepath)
    image = tf.io.serialize_tensor(np.array(Image.open(full_path)))
    split = filepath.split('_')
    age = int(split[0])
    gender = int(split[1])
    
    example = Example(
        features = Features(
            feature = {
                'image' : Feature(bytes_list=BytesList(value=[image.numpy()])),
                'age' : Feature(int64_list=Int64List(value=[age])),
            }
        )
    )
    return example

In [None]:
def create_tf_record(set_, filename):
    with tf.io.TFRecordWriter('%s.tfrecord' %filename) as f:
        for filepath in set_:
            if filepath != 'utkcropped':
                example = create_example(maindir, filepath)
                f.write(example.SerializeToString())

In [None]:
train_range = [i for i in range (0,16596)]
valid_range = [i for i in range (16597, 18968)]
test_range = [i for i in range (18969, len(files)+1)]

In [None]:
create_tf_record(files[:16597], 'train_set')

In [None]:
create_tf_record(files[16597:18968], 'valid_set')

In [None]:
create_tf_record(files[18968:], 'test_set')

In [None]:
@tf.function
def preprocess(tfrecord):
    feature_descriptions = {
        'image': tf.io.FixedLenFeature([], tf.string, default_value=''),
        'age': tf.io.FixedLenFeature([], tf.int64, default_value=-1),
    }
    example = tf.io.parse_single_example(tfrecord, feature_descriptions)
    image = tf.io.parse_tensor(example['image'], out_type=tf.uint8)
    image = tf.reshape(image, shape=[200, 200, 3])
    image = tf.image.resize(image, [224,224]) # reshape to the same size as the data for our model
    image = keras.applications.xception.preprocess_input(image)
    return image, example['age']
    

In [None]:
@tf.function
def utkface_dataset(filepaths, n_read_threads=4, shuffle_buffer_size=None,
                   n_parse_threads=4, batch_size=32, cache=True):
    dataset = tf.data.TFRecordDataset(filepaths,
                                     num_parallel_reads=n_read_threads)
    if cache:
        dataset = dataset.cache()
    if shuffle_buffer_size:
        dataset = dataset.shuffle(shuffle_buffer_size)
    dataset = dataset.map(preprocess, num_parallel_calls=n_parse_threads)
    dataset = dataset.batch(batch_size)
    return dataset.prefetch(1)

In [None]:
train_data = utkface_dataset('/kaggle/working/train_set.tfrecord')
valid_data = utkface_dataset('/kaggle/working/valid_set.tfrecord')
test_data = utkface_dataset('/kaggle/working/test_set.tfrecord')

The data is now converted into TFRecord format.

Let's use a pretrained model to gender and age

In [None]:
for image, age in train_data.take(2):
    plt.imshow(image[0])
    plt.axis('off')
    plt.show()
    print('I am', age[0].numpy(), 'years old!')

# Transfer Learning with XCeption

In [None]:
base_model = keras.applications.xception.Xception(weights ='imagenet',
                                                  include_top = False)
avg = keras.layers.GlobalAveragePooling2D()(base_model.output)
output = keras.layers.Dense(1, activation='relu')(avg)
model = keras.models.Model(inputs=base_model.input, outputs=output)

In [None]:
for layer in base_model.layers: # freeze the weights of base model
    layer.trainable = False
    
optimizer = keras.optimizers.Nadam(learning_rate=1e-4)
model.compile(loss='mean_squared_error', optimizer=optimizer,
             )
history = model.fit(train_data, 
                    validation_data=valid_data,
                   epochs = 15)

In [None]:
for layer in base_model.layers: #unfreeze the weights and continue training
    layers.trainable = True
    
checkpoint_callback = keras.callbacks.ModelCheckpoint('model_1.h5', save_best_only=True)
early_stopping_callback = keras.callbacks.EarlyStopping(patience=5,restore_best_weights=True)

history = model.fit(train_data,
                   validation_data=valid_data,
                   epochs=300,
                   callbacks=[checkpoint_callback, early_stopping_callback])

In [None]:
model = keras.models.load_model('model_1.h5')

In [None]:
rmsq = tf.sqrt(model.evaluate(test_data))
print("RMSQ for Test Data: %.3f" %rmsq.numpy())