In [None]:
import config
import cv2
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Sequential
from keras.layers import Dropout, Dense, Flatten
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.optimizers import SGD
from keras.preprocessing import image, text
from keras.utils import np_utils as u
from keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle as pkl
from sklearn.utils import class_weight
from sklearn.model_selection import StratifiedKFold
from tqdm import tqdm_notebook as tqdm
import utility

In [None]:
BATCH_SIZE=8
K_FOLDS=5

In [None]:
df_train = pkl.load(open('./data/df_train.dump.pkl', 'rb'))
df_valid = pkl.load(open('./data/df_valid.dump.pkl', 'rb'))

In [None]:
df_train.head(2)

In [None]:
df_valid.head(2)

In [None]:
X_train = []
labels_train = []
objects_train = []
attributes_train = []
missed = []
for idx, rows in tqdm(df_train.iterrows(), total=len(df_train)):
    img = cv2.imread(utility.image_location(rows['imageId'], config.RESIZED_TRAIN_DIR))
    if type(img) != np.ndarray:
        missed.append(rows['imageId'])
        continue
    h, w, c = img.shape
    app_img = np.empty(shape=(config.MAX_PIXEL, config.MAX_PIXEL, 3), dtype=np.uint8)
    app_img[:h, :w] = img
    X_train.append(app_img)
    labels_train.append(rows['label'])
    objects_train.append(rows['object'])
    attributes_train.append(rows['attribute'])
print("%d missed." % len(missed))

In [None]:
X_valid = []
labels_valid = []
objects_valid = []
attributes_valid = []
missed = []
for idx, rows in tqdm(df_valid.iterrows(), total=len(df_valid)):
    img = cv2.imread(utility.image_location(rows['imageId'], config.RESIZED_VALID_DIR))
    if type(img) != np.ndarray:
        missed.append(rows['imageId'])
        continue
    h, w, c = img.shape
    app_img = np.empty(shape=(config.MAX_PIXEL, config.MAX_PIXEL, 3), dtype=np.uint8)
    app_img[:h, :w] = img
    X_valid.append(app_img)
    labels_valid.append(rows['object'])
    objects_valid.append(rows['object'])
    attributes_valid.append(rows['attribute'])
print("%d missed." % len(missed))

In [None]:
X_train = np.array(X_train)
X_valid = np.array(X_valid)

In [None]:
label_tokenizer = text.Tokenizer()
label_tokenizer.fit_on_texts(labels_train)
y_train = label_tokenizer.texts_to_sequences(labels_train)
y_valid = label_tokenizer.texts_to_sequences(labels_valid)
y_train = np.array([_[0] for _ in y_train])
y_valid = np.array([_[0] for _ in y_valid])
cls_wgt = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
cls_wgt = np.array([0] + cls_wgt.tolist())
cls_wgt = {idx: value for idx, value in enumerate(cls_wgt)}
cls_wgt

In [None]:
label_tokenizer.word_index

In [None]:
X_train, X_valid = X_train.astype('float32')/255.0, X_valid.astype('float32')/255.0

In [None]:
print("Shape:")
print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_valid:", X_valid.shape)
print("y_valid:", y_valid.shape)

In [None]:
X = np.vstack((X_train, X_valid))
y = np.hstack((y_train, y_valid))

In [None]:
print("Shape:")
print("X:", X.shape)
print("y:", y.shape)

In [None]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(32, 32, 3), padding='same',
             activation='relu'))
model.add(Dropout(0.2))
model.add(Conv2D(32, (3, 3), activation='relu', padding='valid'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='softmax'))
model.compile(loss='categorical_crossentropy',
    optimizer=SGD(momentum=0.5, decay=0.0001), metrics=['accuracy'])

In [None]:
early_stopping =EarlyStopping(monitor='val_loss', patience=3)
bst_model_path = './models/object_model.hdf5'
model_checkpoint = ModelCheckpoint(bst_model_path, save_best_only=True, save_weights_only=True)

In [None]:
skf = StratifiedKFold(n_splits=K_FOLDS, shuffle=True)

In [None]:
for idx, (train_indices, valid_indices) in enumerate(skf.split(X, y)):
    print("Training on fold " + str(idx+1) + "/" + str(K_FOLDS) + "...")
    X_train, y_train = X[train_indices], y[train_indices]
    X_valid, y_valid = X[valid_indices], y[valid_indices]
    y_train = to_categorical(y_train)
    y_valid = to_categorical(y_valid)
    model.fit(
        X_train, y_train, 
        epochs=50, 
        validation_data=(X_valid, y_valid), 
        shuffle=True, 
        callbacks=[early_stopping, model_checkpoint],
        batch_size=16
    )
    break

In [None]:
print("Accuracy: %.2f" % (model.evaluate(X_valid, y_valid)[-1]*100))

In [None]:
label_tokenizer.word_index

In [None]:
import pickle as pkl
pkl.dump(label_tokenizer, open('./models/object_model_label_tokenizer.pkl', 'wb'))

In [None]:
def predict_class(img_path):
    img = cv2.imread(img_path)
    h, w, c = img.shape
    ip = np.empty(shape=(1, config.MAX_PIXEL, config.MAX_PIXEL, 3))
    ip[0, :h, :w] = img
    prediction = model.predict(ip)
    return prediction[:, 1:][0].tolist()

In [None]:
predict_class('./resized_test/1.jpg')

In [None]:
all_rows = []
resized_test = os.listdir(config.RESIZED_TEST_DIR)
for _ in tqdm(resized_test, total=len(resized_test)):
    img_path = config.RESIZED_TEST_DIR + "/" + _
    all_rows.append([img_path] + predict_class(img_path))

In [None]:
import pandas as pd
df_test = pd.DataFrame(all_rows, columns = ['img_path', 'dress', 'outerwear', 'pants', 'shoe'])
pkl.dump(df_test, open('df_test.pkl', 'wb'))
df_test = pkl.load(open('df_test.pkl', 'rb'))
df_test.head()