<a href="https://colab.research.google.com/github/mmaruthi/Assignment5/blob/master/ResNet50v1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
# mount gdrive and unzip data
from google.colab import drive
drive.mount('/content/gdrive')
!unzip -q "/content/gdrive/My Drive/hvc_data.zip"
# look for `hvc_annotations.csv` file and `resized` dir
%ls 

In [0]:
%tensorflow_version 1.x

import cv2
import json

import numpy as np
import pandas as pd

from functools import partial
from pathlib import Path 
from tqdm import tqdm

from google.colab.patches import cv2_imshow

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


from keras.applications import VGG16
from keras.layers.core import Dropout
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers import Input
from keras.models import Model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet50 import ResNet50


In [0]:
# load annotations
df = pd.read_csv("hvc_annotations.csv")
del df["filename"] # remove unwanted column
df.head(2)

In [0]:
# one hot encoding of labels

one_hot_df = pd.concat([
    df[["image_path"]],
    pd.get_dummies(df.gender, prefix="gender"),
    pd.get_dummies(df.imagequality, prefix="imagequality"),
    pd.get_dummies(df.age, prefix="age"),
    pd.get_dummies(df.weight, prefix="weight"),
    pd.get_dummies(df.carryingbag, prefix="carryingbag"),
    pd.get_dummies(df.footwear, prefix="footwear"),
    pd.get_dummies(df.emotion, prefix="emotion"),
    pd.get_dummies(df.bodypose, prefix="bodypose"),
], axis = 1)

one_hot_df.head(2).T

In [0]:
import keras
import numpy as np

# Label columns per attribute
_gender_cols_ = [col for col in one_hot_df.columns if col.startswith("gender")]
_imagequality_cols_ = [col for col in one_hot_df.columns if col.startswith("imagequality")]
_age_cols_ = [col for col in one_hot_df.columns if col.startswith("age")]
_weight_cols_ = [col for col in one_hot_df.columns if col.startswith("weight")]
_carryingbag_cols_ = [col for col in one_hot_df.columns if col.startswith("carryingbag")]
_footwear_cols_ = [col for col in one_hot_df.columns if col.startswith("footwear")]
_emotion_cols_ = [col for col in one_hot_df.columns if col.startswith("emotion")]
_bodypose_cols_ = [col for col in one_hot_df.columns if col.startswith("bodypose")]

class PersonDataGenerator(keras.utils.Sequence):
    """Ground truth data generator"""

    
    def __init__(self, df, batch_size=32, shuffle=True,augmentation=None):
        self.df = df
        self.batch_size=batch_size
        self.shuffle = shuffle
        self.on_epoch_end()
        self.augmentation = augmentation

    def __len__(self):
        return int(np.floor(self.df.shape[0] / self.batch_size))

    def __getitem__(self, index):
        """fetch batched images and targets"""
        batch_slice = slice(index * self.batch_size, (index + 1) * self.batch_size)
        items = self.df.iloc[batch_slice]
#       'items' is a df generated from main df based on batch_size. items.iterrows() will take item['image_path'] and fetch it to cv2.imread().
#       cv2.imread() will convert it to a numpy array which can be used by python
        image = np.stack([cv2.imread(item["image_path"]) for _, item in items.iterrows()])
        # target is a dictionary with all gender cols counted against key 'gender_output' and so on.
        target = {
            "gender_output": items[_gender_cols_].values,
            "image_quality_output": items[_imagequality_cols_].values,
            "age_output": items[_age_cols_].values,
            "weight_output": items[_weight_cols_].values,
            "bag_output": items[_carryingbag_cols_].values,
            "pose_output": items[_bodypose_cols_].values,
            "footwear_output": items[_footwear_cols_].values,
            "emotion_output": items[_emotion_cols_].values,
        }
        return image, target

    def on_epoch_end(self):
        """Updates indexes after each epoch"""
        if self.shuffle == True:
            self.df = self.df.sample(frac=1).reset_index(drop=True)


In [0]:
def get_random_eraser(p=0.5, s_l=0.02, s_h=0.4, r_1=0.3, r_2=1/0.3, v_l=0, v_h=255, pixel_level=False):
    def eraser(input_img):
        img_h, img_w, img_c = input_img.shape
        p_1 = np.random.rand()

        if p_1 > p:
            return input_img

        while True:
            s = np.random.uniform(s_l, s_h) * img_h * img_w
            r = np.random.uniform(r_1, r_2)
            w = int(np.sqrt(s / r))
            h = int(np.sqrt(s * r))
            left = np.random.randint(0, img_w)
            top = np.random.randint(0, img_h)

            if left + w <= img_w and top + h <= img_h:
                break

        if pixel_level:
            c = np.random.uniform(v_l, v_h, (h, w, img_c))
        else:
            c = np.random.uniform(v_l, v_h)

        input_img[top:top + h, left:left + w, :] = c

        return input_img

    return eraser

In [0]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(one_hot_df, test_size=0.15)
train_df.shape, val_df.shape

In [0]:
train_df.head(2)

In [0]:
# create train and validation data generators
train_gen = PersonDataGenerator(train_df, batch_size=32, 
                                augmentation=ImageDataGenerator(rescale=1./255,
                                                                featurewise_center=True,
                                                                featurewise_std_normalization=True,
                                                                horizontal_flip=True,vertical_flip=False,rotation_range=20,
                                                                preprocessing_function=get_random_eraser(v_l=1, v_h=1)))
valid_gen = PersonDataGenerator(val_df, batch_size=32, shuffle=False,
                                augmentation=ImageDataGenerator(rescale=1./255,
                                                                featurewise_center=True,
                                                                featurewise_std_normalization=True))

In [0]:
# get number of output units from data
images, targets = next(iter(train_gen))
# targets is a dictionary that we get from person data generator.
# k will get key value 'age_output' which will be split to get 'age' only
# v will get value of disctionary. We will take how many columns are there in v to understand classes within each class. eg: v.shape[1] will be 2
# for key 'gender_output' because there are 2 sub-categories 'male' & 'female'
num_units = { k.split("_output")[0]:v.shape[1] for k, v in targets.items()}
num_units

In [0]:
# To check if images are getting read correctly
from google.colab.patches import cv2_imshow
img = cv2.imread('resized/1.jpg',0)
cv2_imshow(img)
print('type::',type(img))

In [0]:
backbone = ResNet50(
    weights=None, 
    include_top=False, 
    input_tensor=Input(shape=(224, 224, 3))
)

In [0]:
neck = backbone.output  #backbone.output gives the last layer already build by VGG16 on the above statement (block5_maxpool)
neck = Flatten(name="flattener")(neck)  # Adds Flatten layer
neck = Dense(512, activation="relu")(neck) # Adds fully connected layer with Relu Activation

In [0]:
def build_tower(in_layer):
    head = Dropout(0.3)(in_layer)
    head = Dense(256, activation="relu")(head)
    head = Dropout(0.3)(in_layer)
    head = Dense(256, activation="relu")(head)
    return head

In [0]:
def build_head(name, activator,in_layer):
    return Dense(
        num_units[name], activation=activator, name=f"{name}_output"
    )(in_layer)

In [0]:
# heads
# Calls build_tower function with layers till previous point i.e. in_layer
# For each multi-label class, build_tower adds to in_layer following - dropout -> fully connected layer -> droput -> fully connected layer &  
# returns back to build_head.
# Build_head applies a dense layer with softmax activation on top of this to give final output for that particular class. eg: gender, age etc.
# Model API will use one 'neck' and connect one 'backbone'(VGG16) with 8 'heads'(below) which are built using build_head & build_tower

# gender = build_head("gender", "sigmoid", build_tower(neck))  #gender is binary classification
# image_quality = build_head("image_quality", "softmax",build_tower(neck)) # image is multi-class single label, can be either good, avg or bad
# age = build_head("age", "softmax",build_tower(neck))  # Age is regression to arbitrary value but here we are didving to groups like 15-25, 25-35, hence multi-class single label
# weight = build_head("weight", "softmax",build_tower(neck)) # weight is multi-class, single label as it can healthy, over or under
# bag = build_head("bag", "softmax",build_tower(neck))  # bag is multi-class, single label 
# footwear = build_head("footwear","softmax", build_tower(neck))  # multi-class, single label 
# emotion = build_head("emotion", "softmax", build_tower(neck))  # multi-class, single label 
# pose = build_head("pose", "softmax",build_tower(neck)) # multi-class, single label 

gender = build_head("gender", "sigmoid", build_tower(neck))  #gender is binary classification
image_quality = build_head("image_quality", "sigmoid",build_tower(neck)) # image is multi-class single label, can be either good, avg or bad
age = build_head("age", "sigmoid",build_tower(neck))  # Age is regression to arbitrary value but here we are didving to groups like 15-25, 25-35, hence multi-class single label
weight = build_head("weight", "sigmoid",build_tower(neck)) # weight is multi-class, single label as it can healthy, over or under
bag = build_head("bag", "sigmoid",build_tower(neck))  # bag is multi-class, single label 
footwear = build_head("footwear","sigmoid", build_tower(neck))  # multi-class, single label 
emotion = build_head("emotion", "sigmoid", build_tower(neck))  # multi-class, single label 
pose = build_head("pose", "sigmoid",build_tower(neck)) # multi-class, single label 

In [0]:
model = Model(
    inputs=backbone.input, 
    outputs=[gender, image_quality, age, weight, bag, footwear, pose, emotion]
)

In [0]:
# freeze backbone
#for layer in backbone.layers:
#	print(layer)
#	layer.trainable = False

In [0]:
from keras.utils import plot_model
#plot_model(model, show_shapes=True,to_file='model.png')
plot_model(model)

In [0]:
# losses = {
# 	"gender_output": "binary_crossentropy",
# 	"image_quality_output": "categorical_crossentropy",
# 	"age_output": "categorical_crossentropy",
# 	"weight_output": "categorical_crossentropy",

# }
# loss_weights = {"gender_output": 1.0, "image_quality_output": 1.0, "age_output": 1.0}
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler
def scheduler(epoch, lr):
  return round(0.004 * 1/(1 + 0.319 * epoch), 10)
opt = SGD(lr=0.003, momentum=0.9)
#opt = SGD(momentum=0.5)
model.compile(
    optimizer=opt,
    loss = "binary_crossentropy",
#    loss={"gender_output":"binary_crossentropy", 
#          "image_quality_output":"categorical_crossentropy",
#          "age_output":"categorical_crossentropy",
#          "weight_output":"categorical_crossentropy",
#          "bag_output":"categorical_crossentropy",
#          "emotion_output": "categorical_crossentropy",
#          "pose_output": "categorical_crossentropy",
#          "footwear_output": "categorical_crossentropy"},
    # loss_weights=loss_weights, 
    metrics=["accuracy"]
)

In [0]:
# model.fit(X_train, y_train, validation_data=(X_valid, y_valid), batch_size=32, epochs=10)
model.summary()

In [0]:
model.fit_generator(
    generator=train_gen,
    validation_data=valid_gen,
 #  use_multiprocessing=False,
 #  workers=1, 
    epochs=100,
    verbose=1,
    callbacks=[LearningRateScheduler(scheduler,verbose=1)])

In [0]:
model