<a href="https://colab.research.google.com/github/kateprashant/TSAI_Repository/blob/master/Assignment_5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from google.colab import drive
drive.mount('/content/gdrive')
!unzip -q "/content/gdrive/My Drive/hvc_data.zip"
# look for `hvc_annotations.csv` file and `resized` dir
%ls 

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive
[0m[01;34mgdrive[0m/  hvc_annotations.csv  [01;34mresized[0m/  [01;34msample_data[0m/


In [0]:
import cv2
import json

import numpy as np
import pandas as pd

from functools import partial
from pathlib import Path 
from tqdm import tqdm

from google.colab.patches import cv2_imshow

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


from keras.applications import VGG16
from keras.layers.core import Dropout
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers import Input
from keras.models import Model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.layers.convolutional import Convolution2D, MaxPooling2D

In [26]:
# load annotations
df = pd.read_csv("hvc_annotations.csv")
del df["filename"] # remove unwanted column
df.head()
df.shape

(13573, 9)

In [27]:
# one hot encoding of labels

one_hot_df = pd.concat([
    df[["image_path"]],
    pd.get_dummies(df.gender, prefix="gender"),
    pd.get_dummies(df.imagequality, prefix="imagequality"),
    pd.get_dummies(df.age, prefix="age"),
    pd.get_dummies(df.weight, prefix="weight"),
    pd.get_dummies(df.carryingbag, prefix="carryingbag"),
    pd.get_dummies(df.footwear, prefix="footwear"),
    pd.get_dummies(df.emotion, prefix="emotion"),
    pd.get_dummies(df.bodypose, prefix="bodypose"),
], axis = 1)

one_hot_df.head().T
one_hot_df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
gender_female,13573.0,0.437413,0.496086,0.0,0.0,0.0,1.0,1.0
gender_male,13573.0,0.562587,0.496086,0.0,0.0,1.0,1.0,1.0
imagequality_Average,13573.0,0.553231,0.497177,0.0,0.0,1.0,1.0,1.0
imagequality_Bad,13573.0,0.165034,0.371224,0.0,0.0,0.0,0.0,1.0
imagequality_Good,13573.0,0.281736,0.449862,0.0,0.0,0.0,1.0,1.0
age_15-25,13573.0,0.183747,0.387292,0.0,0.0,0.0,0.0,1.0
age_25-35,13573.0,0.398659,0.48964,0.0,0.0,0.0,1.0,1.0
age_35-45,13573.0,0.253076,0.43479,0.0,0.0,0.0,1.0,1.0
age_45-55,13573.0,0.109777,0.312623,0.0,0.0,0.0,0.0,1.0
age_55+,13573.0,0.054741,0.227482,0.0,0.0,0.0,0.0,1.0


In [28]:
gender_encoder = LabelEncoder().fit(one_hot_df.gender_male)
print(gender_encoder.classes_)
gender_encoder.transform(one_hot_df.gender_male)

[0 1]


array([1, 0, 1, ..., 0, 0, 1])

In [0]:
import keras
import numpy as np

# Label columns per attribute
_gender_cols_ = [col for col in one_hot_df.columns if col.startswith("gender")]
_imagequality_cols_ = [col for col in one_hot_df.columns if col.startswith("imagequality")]
_age_cols_ = [col for col in one_hot_df.columns if col.startswith("age")]
_weight_cols_ = [col for col in one_hot_df.columns if col.startswith("weight")]
_carryingbag_cols_ = [col for col in one_hot_df.columns if col.startswith("carryingbag")]
_footwear_cols_ = [col for col in one_hot_df.columns if col.startswith("footwear")]
_emotion_cols_ = [col for col in one_hot_df.columns if col.startswith("emotion")]
_bodypose_cols_ = [col for col in one_hot_df.columns if col.startswith("bodypose")]




class PersonDataGenerator(keras.utils.Sequence):
    """Ground truth data generator"""

    def __init__(self, df, batch_size=32, shuffle=True, augmentation=None):
        self.df = df
        self.batch_size=batch_size
        self.shuffle = shuffle
        self.on_epoch_end()
        self.augmentation = augmentation

    def __len__(self):
        return int(np.floor(self.df.shape[0] / self.batch_size))

    def __getitem__(self, index):
        """fetch batched images and targets"""
        batch_slice = slice(index * self.batch_size, (index + 1) * self.batch_size)
        items = self.df.iloc[batch_slice]
        
        images = np.stack([cv2.imread(item["image_path"]) for _, item in items.iterrows()])        
        if self.augmentation is not None:
            images = self.augmentation.flow(images, shuffle=False).next()
        
        target = {
            "gender_output": items[_gender_cols_].values,
            "image_quality_output": items[_imagequality_cols_].values,
            "age_output": items[_age_cols_].values,
            "weight_output": items[_weight_cols_].values,
            "bag_output": items[_carryingbag_cols_].values,
            "pose_output": items[_bodypose_cols_].values,
            "footwear_output": items[_footwear_cols_].values,
            "emotion_output": items[_emotion_cols_].values,
        }
        
        return images, target

    def on_epoch_end(self):
        """Updates indexes after each epoch"""
        if self.shuffle == True:
            self.df = self.df.sample(frac=1).reset_index(drop=True)


train_gen = PersonDataGenerator(
    train_df, 
    batch_size=32, 
    augmentation=ImageDataGenerator(
        horizontal_flip=True,
        vertical_flip=True,
    )
)



In [30]:
from sklearn.model_selection import train_test_split
train_df, val_df = train_test_split(one_hot_df, test_size=0.15)
train_df.shape, val_df.shape

((11537, 28), (2036, 28))

In [31]:
train_df.head()

Unnamed: 0,image_path,gender_female,gender_male,imagequality_Average,imagequality_Bad,imagequality_Good,age_15-25,age_25-35,age_35-45,age_45-55,age_55+,weight_normal-healthy,weight_over-weight,weight_slightly-overweight,weight_underweight,carryingbag_Daily/Office/Work Bag,carryingbag_Grocery/Home/Plastic Bag,carryingbag_None,footwear_CantSee,footwear_Fancy,footwear_Normal,emotion_Angry/Serious,emotion_Happy,emotion_Neutral,emotion_Sad,bodypose_Back,bodypose_Front-Frontish,bodypose_Side
2314,resized/2315.jpg,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,1,0,0,0,0,1,0,0,0,1
7883,resized/7884.jpg,0,1,0,0,1,1,0,0,0,0,1,0,0,0,0,0,1,0,0,1,0,0,1,0,1,0,0
9151,resized/9152.jpg,1,0,1,0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,1,0,0,1,0,0,1,0
4163,resized/4164.jpg,0,1,0,0,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1,0
11519,resized/11521.jpg,0,1,1,0,0,0,0,1,0,0,0,0,1,0,0,1,0,0,0,1,0,0,1,0,0,0,1


In [0]:
# create train and validation data generators
train_gen = PersonDataGenerator(train_df, batch_size=32, augmentation=ImageDataGenerator(horizontal_flip=True, vertical_flip=True))
valid_gen = PersonDataGenerator(val_df, batch_size=64, shuffle=False)

In [53]:
# get number of output units from data
images, targets = next(iter(train_gen))
num_units = { k.split("_output")[0]:v.shape[1] for k, v in targets.items()}
num_units

{'age': 5,
 'bag': 3,
 'emotion': 4,
 'footwear': 3,
 'gender': 2,
 'image_quality': 3,
 'pose': 3,
 'weight': 4}

In [0]:
def scheduler(epoch,lr):
  if(epoch % 10 > 5):
    return round(0.003*1/(1 + 0.319*epoch),10)
  else:
    return 0.001 +  round(0.003*1/(1 + 0.319*epoch),10)

In [0]:
backbone = VGG16(
    weights=None, 
    include_top=False, 
    input_tensor=Input(shape=(224, 224, 3))
)

neck = backbone.output
neck = Flatten(name="flatten")(neck)
neck = Dense(512, activation="relu")(neck)


def build_tower(in_layer):
    neck = Dropout(0.2)(in_layer)
    neck = Dense(256, activation="relu")(neck)
    neck = MaxPooling2D(pool_size=(2, 2))
    neck = Dropout(0.3)(in_layer)
    neck = Dense(128, activation="relu")(neck)
    neck = MaxPooling2D(pool_size=(2, 2))
    neck = Dropout(0.25)(in_layer)
    neck = Dense(64, activation="relu")(neck)
    neck = MaxPooling2D(pool_size=(2, 2))
    neck = Dropout(0.25)(in_layer)
    neck = Dense(32, activation="relu")(neck)
    return neck


def build_head(name, in_layer):
    return Dense(
        num_units[name], activation="softmax", name=f"{name}_output"
    )(in_layer)

# heads
gender = build_head("gender", build_tower(neck))
image_quality = build_head("image_quality", build_tower(neck))
age = build_head("age", build_tower(neck))
weight = build_head("weight", build_tower(neck))
bag = build_head("bag", build_tower(neck))
footwear = build_head("footwear", build_tower(neck))
emotion = build_head("emotion", build_tower(neck))
pose = build_head("pose", build_tower(neck))


model = Model(
    inputs=backbone.input, 
    outputs=[gender, image_quality, age, weight, bag, footwear, pose, emotion]
)

In [0]:
# freeze backbone
for layer in backbone.layers:
	layer.trainable = False

In [0]:
# losses = {
# 	"gender_output": "binary_crossentropy",
# 	"image_quality_output": "categorical_crossentropy",
# 	"age_output": "categorical_crossentropy",
# 	"weight_output": "categorical_crossentropy",

# }
# loss_weights = {"gender_output": 1.0, "image_quality_output": 1.0, "age_output": 1.0}
opt = SGD(lr=0.001, momentum=0.9)
model.compile(
    optimizer=opt,
    loss="categorical_crossentropy", 
    # loss_weights=loss_weights, 
    metrics=["accuracy"]
)

In [0]:
from keras.callbacks import LearningRateScheduler
model.fit_generator(
    generator=train_gen,
    validation_data=valid_gen,
    use_multiprocessing=True,
    workers=6, 
    epochs=100,
    callbacks = [LearningRateScheduler(scheduler, verbose=1)]
)

Epoch 1/100

Epoch 00001: LearningRateScheduler setting learning rate to 0.004.
Epoch 2/100

Epoch 00002: LearningRateScheduler setting learning rate to 0.0032744503.
Epoch 3/100

Epoch 00003: LearningRateScheduler setting learning rate to 0.0028315018.

Epoch 4/100

Epoch 00004: LearningRateScheduler setting learning rate to 0.0025329586000000003.
Epoch 5/100

Epoch 00005: LearningRateScheduler setting learning rate to 0.0023181019000000002.
Epoch 6/100

Epoch 00006: LearningRateScheduler setting learning rate to 0.0021560694.
Epoch 00006: LearningRateScheduler setting learning rate to 0.0021560694.Epoch 6/100
Epoch 7/100

Epoch 00007: LearningRateScheduler setting learning rate to 0.0010295127.
Epoch 8/100

Epoch 00008: LearningRateScheduler setting learning rate to 0.0009279307.

Epoch 00008: LearningRateScheduler setting learning rate to 0.0009279307.
Epoch 9/100

Epoch 00009: LearningRateScheduler setting learning rate to 0.0008445946.
Epoch 00009: LearningRateScheduler setting le

In [0]:
model

<keras.engine.training.Model at 0x7f7a60979780>