In [1]:
import config
import utility

import cv2
from keras.applications.inception_v3 import InceptionV3, preprocess_input, decode_predictions
from keras.utils import to_categorical
import matplotlib.pyplot as plt
import numpy as np
import os
import pickle as pkl
from sklearn.metrics import accuracy_score
from sklearn.utils import class_weight
from tqdm import tqdm_notebook as tqdm
import xgboost as xgb

In [2]:
BATCH_SIZE=32

In [3]:
model = InceptionV3(weights='imagenet', include_top=True)

In [4]:
df_train = pkl.load(open('./data/df_train.dump.pkl', 'rb'))
df_valid = pkl.load(open('./data/df_valid.dump.pkl', 'rb'))
task_id_to_object_map = pkl.load(open('./data/task_id_to_object_map.dump.pkl', 'rb'))

In [5]:
train_labels = {}
for path, taskId in zip(df_train.imageId.tolist(), df_train.taskId.tolist()):
    train_labels[path.split('./train_images/')[1]] = task_id_to_object_map[taskId]
valid_labels = {}
for path, taskId in zip(df_valid.imageId.tolist(), df_valid.taskId.tolist()):
    valid_labels[path.split('./valid_images/')[1]] = task_id_to_object_map[taskId]

In [6]:
resized_train = os.listdir(config.RESIZED_TRAIN_DIR)
resized_valid = os.listdir(config.RESIZED_VALID_DIR)
img_train = np.empty(shape=(len(resized_train), config.MAX_PIXEL, config.MAX_PIXEL, 3), dtype=np.uint8)
labels_train = []
img_valid = np.empty(shape=(len(resized_valid), config.MAX_PIXEL, config.MAX_PIXEL, 3), dtype=np.uint8)
labels_valid = []

In [7]:
for idx, _ in tqdm(enumerate(resized_train), total=len(resized_train)):
    img = cv2.imread(config.RESIZED_TRAIN_DIR + "/" + _)
    h, w, c = img.shape
    img_train[idx, :h, :w] = img
    labels_train.append(train_labels[_])




In [8]:
for idx, _ in tqdm(enumerate(resized_valid), total=len(resized_valid)):
    img = cv2.imread(config.RESIZED_VALID_DIR + "/" + _)
    h, w, c = img.shape
    img_valid[idx, :h, :w] = img
    labels_valid.append(valid_labels[_])




In [9]:
X_train = model.predict(img_train, batch_size=1024)
X_valid = model.predict(img_valid, batch_size=1024)

In [10]:
pkl.dump(X_train, open('./data/X_train_1000.dump.pkl', 'wb'))
pkl.dump(X_train, open('./data/labels_train.dump.pkl', 'wb'))
pkl.dump(X_valid, open('./data/X_valid_1000.dump.pkl', 'wb'))
pkl.dump(labels_valid, open('./data/labels_valid.dump.pkl', 'wb'))

In [11]:
label_tokenizer = text.Tokenizer()
label_tokenizer.fit_on_texts(labels_valid)
y_train = label_tokenizer.texts_to_sequences(labels_train)
y_valid = label_tokenizer.texts_to_sequences(labels_valid)
y_train = [_[0] for _ in y_train]
y_valid = [_[0] for _ in y_valid]
cls_wgt = class_weight.compute_class_weight('balanced', np.unique(y_train), y_train)
cls_wgt = cls_wgt.tolist()

In [12]:
params = {}
params['objective'] = 'multi:softprob'
params['eval_metric'] = 'mlogloss'
params['eta'] = 0.01
params['num_class'] = 5
params['n_estimators'] = 10000
params['max_depth'] = 4 # or 5
params['min_child_weight'] = 4 # or 1
params['gamma'] = 0.4 # or 0
params['subsample'] = 0.8
params['colsample_bytree'] = 0.75 # or 0.8
params['nthread'] = 4
params['scale_pos_weight'] = 1
params['seed'] = 1

In [13]:
d_train = xgb.DMatrix(X_train, label=y_train)
d_valid = xgb.DMatrix(X_valid, label=y_valid)

watchlist = [(d_train, 'train'), (d_valid, 'valid')]

bst = xgb.train(params, d_train, 1110, watchlist, early_stopping_rounds=50, verbose_eval=1)

In [15]:
y_valid_hat = bst.predict(d_valid)

In [16]:
accuracy_score(y_valid_hat.argmax(axis=1), y_valid)

0.37517670342097825