<a href="https://colab.research.google.com/github/mad0511/EIP4/blob/master/Assignment%205/PersonAttrubutes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

In [0]:
!unzip -q "/content/gdrive/My Drive/person_dataset.zip"

In [0]:
%tensorflow_version 1.x

import cv2
import json

import numpy as np
import pandas as pd

from functools import partial
from pathlib import Path 
from tqdm import tqdm

from google.colab.patches import cv2_imshow

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder


from keras.applications import VGG16
from keras.layers.core import Dropout
from keras.layers.core import Flatten
from keras.layers.core import Dense
from keras.layers import Input
from keras.models import Model
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator


In [0]:
annotation_json = Path("via_export_json.json")
images_root = Path("imagesa")

In [0]:

ann_list = [
    dict({
        "file_name": str(images_root/file_name).split(".jpg")[0]+".jpg"}, 
         **ann["regions"][0]["region_attributes"]
    ) for file_name, ann in tqdm(json.loads(annotation_json.read_text()).items())
]
df = pd.DataFrame(ann_list[3:]) #remove [3:]

In [0]:
df.head()

In [0]:
df.describe().T

In [0]:
gender_encoder = LabelEncoder().fit(df.Gender)
print(gender_encoder.classes_)
gender_encoder.transform(df.Gender)

In [0]:
def encode_multi_categories(x, n=3, order_dict=None):
    """`x` is numpy array"""
    if order_dict is None:
        x_encoded = LabelEncoder().fit_transform(x)
        return np.eye(n)[x_encoded]
    return np.eye(n)[order_dict[x]]

In [0]:
def resize_and_pad(image, size=224, fill=0):
    orig_size = image.shape[:2] 
    ratio = float(size)/max(orig_size)
    new_size = tuple([int(x*ratio) for x in orig_size])

    image = cv2.resize(image, (new_size[1], new_size[0]))

    delta_w = size - new_size[1]
    delta_h = size - new_size[0]
    top, bottom = delta_h//2, delta_h-(delta_h//2)
    left, right = delta_w//2, delta_w-(delta_w//2)

    color = [fill]*3
    return cv2.copyMakeBorder(
        image, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
    )

In [0]:
def read_image(filename, resize=None, augment_fn=None):
    image = cv2.imread(filename)
    if resize is not None:
        image = resize_and_pad(image, resize)
    return image

In [0]:
x = read_image(df.file_name[10], resize=224)

In [0]:
df.iloc[10]

In [0]:
cv2_imshow(x)

In [0]:
X = df.file_name.apply(partial(read_image, resize=224)).values
X = np.stack(X)

In [0]:

# gender
gender_dict = {"female": 0, "male": 1}
y_gender = np.stack(
    df.Gender.apply(partial(encode_multi_categories, n=2, order_dict=gender_dict)).values
)

# image quality
image_encode_dict = dict(zip(("Bad", "Average", "Good"), range(3)))
y_image_quality = np.stack(
    df.ImageQuality.apply(partial(encode_multi_categories, n=3, order_dict=image_encode_dict)).values
)

# age
unique_ages = df.Age.unique()
age_dict = dict(zip(sorted(unique_ages),range(len(unique_ages))))
y_age = np.stack(
    df.Age.apply(partial(encode_multi_categories, n=len(unique_ages), order_dict=age_dict)).values
)

# weight
unique_weight = ['underweight','normal-healthy', 'slightly-overweight', 'over-weight']
weight_dict = dict(zip(unique_weight, range(len(unique_weight))))
y_weight = np.stack(
    df.Weight.apply(partial(encode_multi_categories, n=len(unique_weight), order_dict=weight_dict)).values
)


# bag
unique_bags = df.CarryingBag.unique()
bag_dict = dict(zip(sorted(unique_bags),range(len(unique_bags))))
y_bag = np.stack(
    df.CarryingBag.apply(partial(encode_multi_categories, n=len(unique_bags), order_dict=bag_dict)).values
)

# pose
unique_poses = df.BodyPose.unique()
pose_dict = dict(zip(sorted(unique_poses),range(len(unique_poses))))
y_pose = np.stack(
    df.BodyPose.apply(partial(encode_multi_categories, n=len(unique_poses), order_dict=pose_dict)).values
)

# footwear
unique_footwears = df.Footwear.unique()
footwear_dict = dict(zip(sorted(unique_footwears),range(len(unique_footwears))))
y_footwear = np.stack(
    df.Footwear.apply(partial(encode_multi_categories, n=len(unique_footwears), order_dict=footwear_dict)).values
)

# emotion
unique_emotions = df.Emotion.unique()
emotion_dict = dict(zip(unique_emotions, range(len(unique_emotions))))
y_emotion = np.stack(
    df.Emotion.apply(partial(encode_multi_categories, n=len(unique_emotions), order_dict=emotion_dict)).values
)


In [0]:
X_train, X_valid, y_train_idx, y_valid_idx = train_test_split(X, range(len(X)))

y_train = {
    "gender_output": y_gender[y_train_idx], 
    "image_quality_output": y_image_quality[y_train_idx],
    "age_output": y_age[y_train_idx],
    "weight_output": y_weight[y_train_idx],
    "bag_output": y_bag[y_train_idx],
    "pose_output": y_pose[y_train_idx],
    "footwear_output": y_footwear[y_train_idx],
    "emotion_output": y_emotion[y_train_idx],

}

y_valid = {
    "gender_output": y_gender[y_valid_idx], 
    "image_quality_output": y_image_quality[y_valid_idx],
    "age_output": y_age[y_valid_idx],
    "weight_output": y_weight[y_valid_idx],
    "bag_output": y_bag[y_valid_idx],
    "pose_output": y_pose[y_valid_idx],
    "footwear_output": y_footwear[y_valid_idx],
    "emotion_output": y_emotion[y_valid_idx],

}

In [0]:
backbone = VGG16(weights="imagenet", include_top=False, input_tensor=Input(shape=(224, 224, 3)))

neck = backbone.output
neck = Flatten(name="flatten")(neck)
neck = Dense(512, activation="relu")(neck)


def build_tower(in_layer):
    neck = Dropout(0.5)(in_layer)
    neck = Dense(128, activation="relu")(neck)
    return neck

# heads
gender = Dense(2, activation="sigmoid", name="gender_output")(build_tower(neck))
image_quality = Dense(3, activation="softmax", name="image_quality_output")(build_tower(neck))
age = Dense(len(unique_ages), activation="softmax", name="age_output")(build_tower(neck))
weight = Dense(len(unique_weight), activation="softmax", name="weight_output")(build_tower(neck))
bag = Dense(len(unique_bags), activation="softmax", name="bag_output")(build_tower(neck))
footwear = Dense(2, activation="sigmoid", name="footwear_output")(build_tower(neck))
emotion = Dense(len(unique_emotions), activation="sigmoid", name="emotion_output")(build_tower(neck))
pose = Dense(2, activation="sigmoid", name="pose_output")(build_tower(neck))


model = Model(
    inputs=backbone.input, 
    outputs=[gender, image_quality, age, weight, bag, footwear, pose, emotion]
)

In [0]:
df.describe()

In [0]:
# freeze backbone
for layer in backbone.layers:
	layer.trainable = False

In [0]:
# losses = {
# 	"gender_output": "binary_crossentropy",
# 	"image_quality_output": "categorical_crossentropy",
# 	"age_output": "categorical_crossentropy",
# 	"weight_output": "categorical_crossentropy",

# }
# loss_weights = {"gender_output": 1.0, "image_quality_output": 1.0, "age_output": 1.0}
opt = SGD(lr=0.001, momentum=0.9)
model.compile(
    optimizer=opt,
    loss="categorical_crossentropy", 
    # loss_weights=loss_weights, 
    metrics=["accuracy"]
)

In [0]:
model.fit(X_train, y_train, validation_data=(X_valid, y_valid), batch_size=32, epochs=50)

In [0]:
scores = model.evaluate(X_valid, y_valid, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])