In [1]:
from os import listdir, mkdir
from os.path import dirname, isdir
from pathlib import Path
import datetime
import numpy as np
import pandas as pd
from random import sample
import zipfile as z
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
import cv2

import tensorflow as tf
from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.applications.vgg19 import VGG19
from tensorflow.keras.applications.densenet import DenseNet121
from tensorflow.keras import Sequential, layers, Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, \
                                      LearningRateScheduler

In [3]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [4]:
path = "/kaggle/input/age-detection/"
IMG_SIZE = (144, 144, 3)

In [5]:
if not Path(path + "train").is_dir(): mkdir(path + "train")
if not Path(path + "test").is_dir(): mkdir(path + "test")

# with z.ZipFile(path + "train.zip") as zr:
#     zr.extractall(path + "train")

# with z.ZipFile(path + "test.zip") as zr:
#     zr.extractall(path + "test")

In [6]:
images_path = path + "train/"

train_df = pd.read_csv(path + "/train/train.csv")
# labels = train_df.Class.astype("category").cat.codes.to_list()

In [7]:
# train_df = train_df.sample(8000)

labels_df = pd.get_dummies(train_df.Class)
label_names = labels_df.columns.to_list()
labels = list(labels_df.values)

train_df = train_df.assign(labels=labels)

x_train, x_val = train_test_split(train_df, test_size=0.25)
x_train.shape, x_val.shape

In [8]:
basedir = images_path + "Train/"

train_images = []

for img_name in x_train.ID:
    img_path = basedir + img_name
    img = cv2.imread(img_path)
    train_images.append(cv2.resize(img, dsize=IMG_SIZE[:-1]))

In [9]:
val_images = []

for img_name in x_val.ID:
    img_path = basedir + img_name
    img = cv2.imread(img_path)
    val_images.append(cv2.resize(img, dsize=IMG_SIZE[:-1]))

In [10]:
with tf.device("/device:GPU:0"):
    train_dataset = tf.data.Dataset.from_tensor_slices((train_images, x_train.labels.to_list()))

In [11]:
with tf.device("/device:GPU:0"):
    val_dataset = tf.data.Dataset.from_tensor_slices((val_images, x_val.labels.to_list()))

In [12]:
image, label = next(iter(train_dataset))
print(label.numpy())
imshow(image.numpy().astype(int))

In [20]:
train_dataset = train_dataset.batch(64)
val_dataset = val_dataset.batch(64)

In [21]:
data_aug = Sequential([
    layers.ZeroPadding2D(3),
    layers.experimental.preprocessing.RandomFlip(),
    layers.experimental.preprocessing.RandomTranslation((-0.01, 0.01), (-0.01, 0.01), fill_mode="constant"),
    layers.experimental.preprocessing.RandomZoom((-.05, .05)),
    layers.experimental.preprocessing.RandomRotation((-0.05, 0.05)),
    # layers.experimental.preprocessing.Rescaling(1./255),
    # layers.experimental.preprocessing.RandomContrast(0.90),
])

preprocess = tf.keras.applications.resnet50.preprocess_input
# preprocess = tf.keras.applications.densenet.preprocess_input
# preprocess = preprocess_input

In [22]:
# base_model = DenseNet121(include_top=False, input_shape=(256, 256, 3), weights="imagenet")
# base_model = VGG19(include_top=False, input_shape=(256, 256, 3), weights="imagenet")
base_model = ResNet50(include_top=False, input_shape=IMG_SIZE, weights="imagenet")


In [23]:
base_model.trainable = False

clf_layers = Sequential([
    layers.GlobalMaxPooling2D(),
    layers.Dropout(0.2),
    layers.Dense(512, activation="relu"),
    layers.Dropout(0.5),
    layers.Dense(256),
    layers.Dropout(0.3),
    layers.Dense(3, activation="softmax")
])

In [24]:
inputs = tf.keras.Input(shape=IMG_SIZE)
x = data_aug(inputs)
x = preprocess(inputs)
x = base_model(x)
outputs = clf_layers(x)

model = Model(inputs, outputs)

base_lr = 0.001

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_lr),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=["accuracy"])

model.summary()

In [39]:
def scheduler(epochs, lr):
    return lr if epochs<15 else lr * tf.math.exp(-0.1)
  

cp_path = dirname("./models/cp.ckpt")
cp_callback = ModelCheckpoint(filepath=cp_path, save_weights_only=True, 
                              save_best_only=True, monitor="val_accuracy")

es_callback = EarlyStopping(monitor="val_accuracy", patience=7, verbose=1, baseline=0.60)
lr_callback = LearningRateScheduler(schedule=scheduler, verbose=5)

callbacks = [cp_callback, es_callback, lr_callback]

# log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [28]:
epochs = 50
with tf.device("/device:GPU:0"):
    model.fit(train_dataset, epochs=epochs, shuffle=True,
              validation_data=val_dataset, callbacks=[callbacks])

In [41]:
fine_tune_at = 160
base_model.trainable = True

for (idx, layer) in enumerate(base_model.layers):
    if (idx < 160) or layer.name.endswith("bn"):
        layer.trainable = False

In [42]:
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_lr / 10),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=["accuracy"])

model.summary()

In [43]:
epochs = 50
with tf.device("/device:GPU:0"):
    model.fit(train_dataset, epochs=epochs, shuffle=True,
              validation_data=val_dataset, callbacks=[callbacks])

In [45]:
model.load_weights(cp_path)

In [47]:
model.evaluate(val_dataset), model.evaluate(train_dataset)

In [50]:
test_df = pd.read_csv(path+"test/test.csv")
test_images = []
basedir = path + "test/Test/"

for img_name in test_df.ID:
    img_path = basedir + img_name
    img = cv2.imread(img_path)
    test_images.append(cv2.resize(img, dsize=IMG_SIZE[:-1]))

In [51]:
test_dataset = tf.data.Dataset.from_tensor_slices(test_images)
test_dataset = test_dataset.batch(64)

In [None]:
preds = model.predict(test_dataset)

In [52]:
submission = pd.read_csv(path+"sample_submission.csv")
submission.head()

In [64]:
submission = submission.assign(Class=np.array(label_names)[np.argmax(preds, axis=1)])

In [65]:
submission.to_csv("submission.csv", index=None)