# One dataset

In [None]:
!nvidia-smi

Thu Oct 14 10:30:39 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.74       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   35C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

import cv2
import pickle as pkl
import os
import glob


In [None]:
!pip install mlflow
!databricks configure --host https://community.cloud.databricks.com/

In [None]:
!databricks configure --host https://community.cloud.databricks.com/

In [None]:
import mlflow
mlflow.set_tracking_uri("databricks")
mlflow.set_experiment("/Users/mauroscianca98@gmail.com/TestSE")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!nvidia-smi

In [None]:
imdb_utk = "dataset_all_clean_3"

In [None]:
!unzip "/content/drive/MyDrive/AI Playground/cv_age_estimation/dataset/{imdb_utk}.zip" -d "{imdb_utk}"

In [None]:
import glob
train_imgs_1 = glob.glob("%s/train_aug/*/*.jpg" % imdb_utk)
test_imgs_1 = glob.glob("%s/test/*/*.jpg" % imdb_utk)
valid_imgs_1 = glob.glob("%s/validation/*/*.jpg" % imdb_utk)

len_train_1 = len(train_imgs_1)
len_test_1 = len(test_imgs_1)
len_valid_1 = len(valid_imgs_1)

print(len_train_1)
print(len_test_1)
print(len_valid_1)

In [None]:
#images_path = "/content/drive/MyDrive/AI Playground/cv_age_estimation/dataset/finetuning/images/"
train_path_1 = "%s/train_aug/" % imdb_utk
test_path_1 = "%s/test/" % imdb_utk
valid_path_1 = "%s/validation/" % imdb_utk

img_height = 224 #@param {type:'integer'}
img_width = 224 #@param {type:'integer'}
batch_size = 128 #@param {type:'integer'}

## Setup dataset

In [None]:

train_list_ds_1 = tf.data.Dataset.list_files(train_path_1 + "*/*.jpg", shuffle=True)
test_list_ds_1 = tf.data.Dataset.list_files(test_path_1 + "*/*.jpg", shuffle=True)
valid_list_ds_1 = tf.data.Dataset.list_files(valid_path_1 + "*/*.jpg", shuffle=True)

train_ds = train_list_ds_1.shuffle(len_train_1)
test_ds = test_list_ds_1.shuffle(len_test_1)
valid_ds = valid_list_ds_1.shuffle(len_valid_1)


In [None]:
AUTOTUNE = tf.data.AUTOTUNE

def get_label(file_path):
  # convert the path to a list of path components
  parts = tf.strings.split(file_path, os.path.sep)
  
  #age = int(tf.strings.split(parts[-1], "_")[0])
  age = int(parts[-2])

  #tf.print(parts)
  # Integer encode the label
  return age

def decode_img(img):
  # convert the compressed string to a 3D uint8 tensor
  img = tf.image.decode_jpeg(img, channels=3)
  # resize the image to the desired size
  return tf.image.resize(img, [img_height, img_width])

def process_path(file_path):
  label = get_label(file_path)
  # load the raw data from the file as a string
  img = tf.io.read_file(file_path)
  img = decode_img(img) 
  return img, label


def configure_for_performance(ds):
  #ds = ds.cache()
  ds = ds.shuffle(buffer_size=1000)
  ds = ds.batch(batch_size)
  ds = ds.prefetch(buffer_size=AUTOTUNE)
  return ds

In [None]:
train_ds = train_ds.map(process_path, num_parallel_calls=AUTOTUNE)
test_ds = test_ds.map(process_path, num_parallel_calls=AUTOTUNE)
valid_ds = valid_ds.map(process_path, num_parallel_calls=AUTOTUNE)

In [None]:
train_ds = configure_for_performance(train_ds)
test_ds = configure_for_performance(test_ds)
valid_ds = configure_for_performance(valid_ds)

## Preview some data

In [None]:
import matplotlib.pyplot as plt
image_batch, label_batch = next(iter(train_ds))

plt.figure(figsize=(10, 10))
for i in range(9):
  ax = plt.subplot(3, 3, i + 1)
  plt.imshow(image_batch[i].numpy().astype("uint8"))
  label = label_batch[i]
  plt.title("AGE: %d"% label)
  plt.axis("off")

## Config network

In [None]:
errors = {}
num_tot = 0
err = 0
for i in range(8):
  errors[i] = { "err": 0, "num":0, "mae":0 }

In [None]:
from tensorflow.keras.applications import EfficientNetB0,DenseNet121
from tensorflow.keras import layers, callbacks

model_name = "mauro_all_effnetv1_test_2"
weights_path = "/content/drive/MyDrive/AI Playground/cv_age_estimation/dataset/finetuning/runs/"

if not os.path.exists(os.path.join(weights_path, "%s" % (model_name) )):
      os.makedirs(os.path.join(weights_path, "%s" % (model_name) ))


In [None]:
class CustomCallback(tf.keras.callbacks.Callback):
    ep_ = 1
    def on_epoch_begin(self, epoch, logs=None):
        print("epoch started")

    def on_epoch_end(self, epoch, logs=None):
        print("epoch ended")
        
        self.ep_ += 1

    def on_test_end(self, logs=None):
        print("test ended")
        print(errors_loss)
        mlflow.log_metric("mae_valid"  , logs["mae"])
        mlflow.log_metric("regularity_valid", logs["regularity"])

        mlflow.log_metric("loss_valid"  , logs["loss"])
        dict_ = { }

        for i in range(8):
          dict_[i] = { "err": "%.2f" % errors_loss[i]["err"], "num": "%d" % errors_loss[i]["num"], "mae": "%.2f" % errors_loss[i]["mae"], }
        mlflow.log_param("per_class_errors_valid_%d" % ( self.ep_,) ,dict_)


    def on_test_begin(self, logs=None):
        print("test started")

        num_tot_loss = 0
        err_loss = 0
        for i in range(8):
          errors_loss[i] = { "err": 0, "num":0, "mae":0 }
    

    def on_train_batch_begin(self, batch, logs=None):

        num_tot_loss = 0
        err_loss = 0
        
        for i in range(8):
          errors_loss[i] = { "err": 0, "num":0, "mae":0 }


    def on_train_batch_end(self, batch, logs=None):
        if batch % 100 == 0:
            mlflow.log_metric("mae_train"  , logs["mae"])
            mlflow.log_metric("regularity_train", logs["regularity"])
            mlflow.log_metric("loss_train"  , logs["loss"])
        num_tot_loss = 0
        err_loss = 0
        
        for i in range(8):
          errors_loss[i] = { "err": 0, "num":0, "mae":0 }


def scheduler(epoch, lr):
  lr_ = lr
  if epoch > 1:
    lr_ = lr_ * tf.math.exp(-0.15)
  mlflow.log_param("lr_%d" % ( epoch,) , lr_)

  return lr_

cs = [
      callbacks.ModelCheckpoint(filepath=weights_path + "/%s/best_val_loss.h5" % (model_name),
                                      monitor='val_loss',
                                      mode='min', save_weights_only=True, save_best_only=True),
      callbacks.ModelCheckpoint(filepath=weights_path + "/%s/best_loss.h5" % (model_name),
                                      monitor='loss',
                                      mode='min', save_weights_only=True, save_best_only=True),
      callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min'),
      callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=2, verbose=1, min_delta=1e-4, min_lr=0.0001),
      CustomCallback(),
      callbacks.LearningRateScheduler(scheduler, verbose=1)


  ]

In [None]:
# train top layer (output)
from tensorflow.keras import layers
from tensorflow.keras import models
from tensorflow.keras.layers.experimental import preprocessing

def build_model():

    inputs = layers.Input(shape=(img_height, img_width, 3))

    model = EfficientNetB0(include_top=False, input_tensor=inputs, weights="imagenet")

    print("Number of layers in the base model: ", len(model.layers))

    model.trainable = False

    # Rebuild top
    x = layers.GlobalAveragePooling2D(name="avg_pool")(model.output)
    x = layers.BatchNormalization(name="our_bn")(x)

    top_dropout_rate = 0.2
    x = layers.Dropout(top_dropout_rate, name="top_dropout")(x)

    outputs = layers.Dense(1, activation="linear", name="pred")(x)

    # Compile
    model = tf.keras.Model(inputs, outputs, name="EfficientNet")
    
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
    model.compile(
        optimizer=optimizer, loss="mae", metrics=[]
    )
    return model

In [None]:
def unfreeze_model(model):
    # We unfreeze the top 20 layers while leaving BatchNorm layers frozen
    for layer in model.layers[-20:]:
        #if not isinstance(layer, layers.BatchNormalization) or layer.name == "our_bn": #if not isinstance(layer, layers.BatchNormalization) or layer.name == "our_bn":
        layer.trainable = True

In [None]:
import gc
gc.collect()

1181

In [None]:
model = build_model()

Number of layers in the base model:  237


In [None]:
unfreeze_model(model)

In [None]:
for layer in model.layers[:]:
  print(layer.name, str(layer.trainable))

In [None]:
model.summary()

In [None]:
model.load_weights(weights_path + model_name + "/best_loss.h5", )

In [None]:
model.load_weights("best_val_loss_2.h5", )

## Custom metric (regularity)

In [None]:
def get_bin(age):
    if age <= 10:
      return 0
    elif age > 10 and age <= 20:
      return 1
    elif age > 20 and age <= 30:
      return 2
    elif age > 30 and age <= 40:
      return 3
    elif age > 40 and age <= 50:
      return 4
    elif age > 50 and age <= 60:
      return 5
    elif age > 60 and age <= 70:
      return 6
    elif age > 70 :
      return 7

In [None]:
def compute_reg(mae):

  sum_diff = 0
  for a in errors_loss:
    if errors_loss[a]["num"] == 0:
      continue
    sum_diff += pow((errors_loss[a]["mae"] - mae), 2)
  
  regularity = sum_diff / 8

  return np.sqrt(regularity)

In [None]:
def compute_reg32(mae):
  n = 8
  for a in errors_loss:
    if errors_loss[a]["num"] == 0:
      n -= 1
      continue
    errors_loss[a]["mae"] = errors_loss[a]["err"] / errors_loss[a]["num"]
  regularity = 0

  sum_diff = 0
  for a in errors_loss:
    if errors_loss[a]["num"] == 0:
      continue
    sum_diff += pow((errors_loss[a]["mae"] - mae), 2)

  if n > 0:
    regularity = sum_diff / 8
  else:
    regularity = 0
  return np.sqrt(regularity).astype(np.float32)

In [None]:
def update_errors_dict(y_true, y_pred, err, num_tot):
  
  y_true_ = y_true.numpy().reshape(-1)
  y_pred_ = y_pred.numpy().reshape(-1)

  for i in range(len(y_true_)):
    e = abs(y_true_[i] - y_pred_[i])
    errors_loss[get_bin(y_true_[i])]["err"] += e
    errors_loss[get_bin(y_true_[i])]["num"] += 1
    err += e
  num_tot += len(y_true_)
      
  return 0.0

In [None]:
def metric_reg(errors, num_tot, err):

  def regularity(y_true, y_pred):

    mae = tf.keras.losses.MeanAbsoluteError()
    mae_ = mae(y_true, y_pred)
    return tf.py_function(compute_reg, [mae_], np.float64)
  return regularity

In [None]:
errors_loss = {}
num_tot_loss = 0
err_loss = 0
for i in range(8):
  errors_loss[i] = { "err": 0, "num":0, "mae":0 }

## Train model

In [None]:
mlflow.end_run()

In [None]:
mlflow.start_run()

<ActiveRun: >

In [None]:
mlflow.log_param("optimizer", "Adam")

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
model.compile(
    optimizer=optimizer, loss="mae", metrics=[metric_reg(errors_loss, num_tot_loss, err_loss), "mae"]
)

In [None]:
model.fit(
  train_ds,
  callbacks=cs,
  validation_data=valid_ds,
  epochs=20
)

Epoch 1/20
epoch started

Epoch 00001: LearningRateScheduler setting learning rate to 0.0010000000474974513.
test ended
{0: {'err': 813.3555509150028, 'num': 240, 'mae': 3.388981462145845}, 1: {'err': 1838.188660621643, 'num': 360, 'mae': 5.106079612837897}, 2: {'err': 2820.041097640991, 'num': 660, 'mae': 4.2727895418802895}, 3: {'err': 3956.9253721237183, 'num': 900, 'mae': 4.396583746804131}, 4: {'err': 4163.1449909210205, 'num': 900, 'mae': 4.625716656578912}, 5: {'err': 3881.8549728393555, 'num': 900, 'mae': 4.313172192043728}, 6: {'err': 1741.2029495239258, 'num': 360, 'mae': 4.836674859788682}, 7: {'err': 1779.9946517944336, 'num': 240, 'mae': 7.4166443824768065}}
epoch ended
Epoch 2/20
epoch started

Epoch 00002: LearningRateScheduler setting learning rate to 0.0010000000474974513.
test ended
{0: {'err': 1187.7497276067734, 'num': 240, 'mae': 4.948957198361556}, 1: {'err': 2046.4124174118042, 'num': 360, 'mae': 5.684478937255012}, 2: {'err': 2362.737462043762, 'num': 660, 'mae'

KeyboardInterrupt: ignored

In [None]:
model.save_weights(weights_path + model_name + "/best_loss_.h5")

In [None]:
model.load_weights(weights_path + model_name + "/best_val_loss.h5")

In [None]:
model.evaluate(test_ds,callbacks=cs,)

test started
test ended
{0: {'err': 12061.75424003601, 'num': 480, 'mae': 25.12865466674169}, 1: {'err': 14936.957523345947, 'num': 720, 'mae': 20.74577433798048}, 2: {'err': 17739.013346672058, 'num': 1320, 'mae': 13.43864647475156}, 3: {'err': 17141.89578819275, 'num': 1800, 'mae': 9.52327543788486}, 4: {'err': 12858.924434661865, 'num': 1800, 'mae': 7.1438469081454805}, 5: {'err': 11839.961585998535, 'num': 1800, 'mae': 6.577756436665853}, 6: {'err': 4863.2419509887695, 'num': 720, 'mae': 6.754502709706625}, 7: {'err': 4521.795753479004, 'num': 480, 'mae': 9.420407819747926}}


TypeError: ignored