In [None]:
import config
import cv2
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dropout, Dense, Flatten, Merge, Input
from keras.layers.merge import concatenate
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.preprocessing import image, text
from keras.utils import np_utils as u
from keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle as pkl
from sklearn.utils import class_weight
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm_notebook as tqdm
import utility

In [2]:
BATCH_SIZE=8
K_FOLDS=5

In [3]:
df_train = pkl.load(open('./data/df_train.dump.pkl', 'rb'))
df_valid = pkl.load(open('./data/df_valid.dump.pkl', 'rb'))

In [4]:
df_train.head(2)

Unnamed: 0,imageId,labelId,taskId,object,attribute,label
0,1,6,5,dress,decoration,printed
1,2,7,6,dress,color,purple


In [5]:
df_valid.head(2)

Unnamed: 0,imageId,labelId,taskId,object,attribute,label
0,1,1,1,shoe,gender,men
1,1,2,2,shoe,age,adult


In [6]:
X_train = []
labels_train = []
objects_train = []
attributes_train = []
missed = []
for idx, rows in tqdm(df_train.iterrows(), total=len(df_train)):
    img = cv2.imread(utility.image_location(rows['imageId'], config.RESIZED_TRAIN_DIR))
    if type(img) != np.ndarray:
        missed.append(rows['imageId'])
        continue
    h, w, c = img.shape
    app_img = np.empty(shape=(config.MAX_PIXEL, config.MAX_PIXEL, 3), dtype=np.uint8)
    app_img[:h, :w] = img
    X_train.append(app_img)
    labels_train.append(rows['label'].strip())
    objects_train.append(rows['object'].strip())
    attributes_train.append(rows['attribute'].strip())
print("%d missed." % len(missed))


2958 missed.


In [7]:
X_valid = []
labels_valid = []
objects_valid = []
attributes_valid = []
missed = []
for idx, rows in tqdm(df_valid.iterrows(), total=len(df_valid)):
    img = cv2.imread(utility.image_location(rows['imageId'], config.RESIZED_VALID_DIR))
    if type(img) != np.ndarray:
        missed.append(rows['imageId'])
        continue
    h, w, c = img.shape
    app_img = np.empty(shape=(config.MAX_PIXEL, config.MAX_PIXEL, 3), dtype=np.uint8)
    app_img[:h, :w] = img
    X_valid.append(app_img)
    labels_valid.append(rows['label'].strip())
    objects_valid.append(rows['object'].strip())
    attributes_valid.append(rows['attribute'].strip())
print("%d missed." % len(missed))


532 missed.


In [8]:
X_train = np.array(X_train)
X_valid = np.array(X_valid)

In [9]:
label_tokenizer = text.Tokenizer()
label_tokenizer.fit_on_texts(labels_train)
y_train = label_tokenizer.texts_to_sequences(labels_train)
y_valid = label_tokenizer.texts_to_sequences(labels_valid)
object_tokenizer = text.Tokenizer()
object_tokenizer.fit_on_texts(objects_train)
obj_train = object_tokenizer.texts_to_sequences(objects_train)
obj_valid = object_tokenizer.texts_to_sequences(objects_valid)
attr_tokenizer = text.Tokenizer()
attr_tokenizer.fit_on_texts(attributes_train)
attr_train = attr_tokenizer.texts_to_sequences(attributes_train)
attr_valid = attr_tokenizer.texts_to_sequences(attributes_valid)

In [10]:
print("num labels:", len(label_tokenizer.word_index))
print("num objects:", len(object_tokenizer.word_index))
print("num attributes:", len(attr_tokenizer.word_index))

num labels: 356
num objects: 4
num attributes: 25


In [11]:
y_train = np.array([_[0] for _ in y_train])
y_valid = np.array([_[0] for _ in y_valid])
X_obj_train = to_categorical(np.array([_[0] for _ in obj_train]))
X_obj_valid = to_categorical(np.array([_[0] for _ in obj_valid]))
X_attr_train = to_categorical(np.array([_[0] for _ in attr_train]))
X_attr_valid = to_categorical(np.array([_[0] for _ in attr_valid]))
cls_wgt = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
cls_wgt = np.array([0] + cls_wgt.tolist())
cls_wgt = {idx: value for idx, value in enumerate(cls_wgt)}

In [12]:
X_train, X_valid = X_train.astype('float32')/255.0, X_valid.astype('float32')/255.0

In [13]:
print("Shape:")
print("X_train:", X_train.shape)
print("X_obj_train:", X_obj_train.shape)
print("X_attr_train:", X_attr_train.shape)
print("y_train:", y_train.shape)
print("X_valid:", X_valid.shape)
print("X_obj_valid:", X_obj_valid.shape)
print("X_attr_valid:", X_attr_valid.shape)
print("y_valid:", y_valid.shape)

Shape:
X_train: (52430, 32, 32, 3)
X_obj_train: (52430, 5)
X_attr_train: (52430, 26)
y_train: (52430,)
X_valid: (10473, 32, 32, 3)
X_obj_valid: (10473, 5)
X_attr_valid: (10473, 26)
y_valid: (10473,)


In [14]:
X = np.vstack((X_train, X_valid))
X_obj = np.vstack((X_obj_train, X_obj_valid))
X_attr = np.vstack((X_attr_train, X_attr_valid))
y = np.hstack((y_train, y_valid))

In [15]:
print("Shape:")
print("X:", X.shape)
print("X_obj:", X_obj.shape)
print("X_attr:", X_attr.shape)
print("y:", y.shape)

Shape:
X: (62903, 32, 32, 3)
X_obj: (62903, 5)
X_attr: (62903, 26)
y: (62903,)


In [22]:
img_model = Sequential()
img_model.add(Conv2D(32, (3, 3), input_shape=(32, 32, 3), padding='same',
             activation='relu'))
img_model.add(Dropout(0.2))
img_model.add(Conv2D(32, (3, 3), activation='relu', padding='valid'))
img_model.add(MaxPooling2D(pool_size=(2, 2)))
img_model.add(Flatten())

obj_model = Sequential()
obj_model.add(Dense(10, activation='relu', input_shape=(len(object_tokenizer.word_index)+1,)))

attr_model = Sequential()
attr_model.add(Dense(60, activation='relu', input_shape=(len(attr_tokenizer.word_index)+1,)))

model = Sequential()
model.add(Merge([img_model, obj_model, attr_model], mode='concat'))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(label_tokenizer.word_index)+1, activation='softmax'))
model.compile(loss='categorical_crossentropy',
    optimizer=SGD(momentum=0.5, decay=0.0001), metrics=['accuracy'])



In [23]:
early_stopping =EarlyStopping(monitor='val_loss', patience=3)
bst_model_path = './models/object_model.hdf5'
model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)

In [24]:
model.input

[<tf.Tensor 'conv2d_3_input:0' shape=(?, 32, 32, 3) dtype=float32>,
 <tf.Tensor 'dense_5_input:0' shape=(?, 5) dtype=float32>,
 <tf.Tensor 'dense_6_input:0' shape=(?, 26) dtype=float32>]

In [25]:
model.output

<tf.Tensor 'dense_8/Softmax:0' shape=(?, 357) dtype=float32>

In [26]:
skf = StratifiedKFold(n_splits=K_FOLDS, shuffle=True)

In [39]:
for idx, (train_indices, valid_indices) in enumerate(skf.split(X, y)):
    print("Training on fold " + str(idx+1) + "/" + str(K_FOLDS) + "...") 
    X_train, y_train = X[train_indices], y[train_indices]
    X_obj_train, X_attr_train = X_obj[train_indices], X_attr[train_indices]
    X_valid, y_valid = X[valid_indices], y[valid_indices]
    X_obj_valid, X_attr_valid = X_obj[valid_indices], X_attr[valid_indices]
    y_train = to_categorical(y_train)
    y_valid = to_categorical(y_valid)
    model.fit(
        [X_train, X_obj_train, X_attr_train], y_train, 
        epochs=50, 
        validation_data=([X_valid, X_obj_valid, X_attr_valid], y_valid), 
        shuffle=True, 
        callbacks=[early_stopping, model_checkpoint],
        batch_size=BATCH_SIZE
    )

Training on fold 1/5...
Train on 50199 samples, validate on 12704 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Training on fold 2/5...
Train on 50257 samples, validate on 12646 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Training on fold 3/5...
Train on 50312 samples, validate on 12591 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Training on fold 4/5...
Train on 50381 samples, validate on 12522 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Training on fold 5/5...
Train on 50463 samples, validate on 12440 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50


In [40]:
print("Accuracy: %.2f" % (model.evaluate([X_valid, X_obj_valid, X_attr_valid], y_valid)[-1]*100))

Accuracy: 69.76


In [None]:
label_tokenizer.word_index

In [None]:
import pickle as pkl
pkl.dump(label_tokenizer, open('./models/object_model_label_tokenizer.pkl', 'wb'))

In [None]:
def predict_class(img_path):
    img = cv2.imread(img_path)
    h, w, c = img.shape
    ip = np.empty(shape=(1, config.MAX_PIXEL, config.MAX_PIXEL, 3))
    ip[0, :h, :w] = img
    prediction = model.predict(ip)
    return prediction[:, 1:][0].tolist()

In [None]:
predict_class('./resized_test/1.jpg')

In [None]:
all_rows = []
resized_test = os.listdir(config.RESIZED_TEST_DIR)
for _ in tqdm(resized_test, total=len(resized_test)):
    img_path = config.RESIZED_TEST_DIR + "/" + _
    all_rows.append([img_path] + predict_class(img_path))

In [None]:
import pandas as pd
df_test = pd.DataFrame(all_rows, columns = ['img_path', 'dress', 'outerwear', 'pants', 'shoe'])
pkl.dump(df_test, open('df_test.pkl', 'wb'))
df_test = pkl.load(open('df_test.pkl', 'rb'))
df_test.head()