In [None]:
## import libaries
import pandas as pd
import numpy as np
import cv2
import os, sys
import glob
import time
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder

from keras import __version__
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.utils import to_categorical

In [None]:
IM_WIDTH, IM_HEIGHT = 256, 256 #fixed size for InceptionV3
NB_EPOCHS = 30
BAT_SIZE = 32
FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 172

In [None]:
train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')

In [None]:
# function to read image
def read_img(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (IM_WIDTH, IM_HEIGHT))
    return img

In [None]:
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

In [None]:
label_counts = train.label.value_counts()
print ('The train data has {} unique labels'.format(train['label'].nunique()))

In [None]:
for lbl in label_counts.index:
    #print(lbl)
    save_to_dir = 'input/' + lbl
    if not os.path.exists(save_to_dir):
        os.mkdir(save_to_dir)
    else:
        print(lbl + ' already save.')
        continue
    img_id = train[train['label'] == lbl]['image_id'].values
    n = 1000//len(img_id)
    for img in tqdm(img_id):    
        x = read_image(TRAIN_PATH + '{}.png'.format(img))
        cv2.imwrite('input/' + lbl+ '/' + img +'.png', cv2.cvtColor(x, cv2.COLOR_RGB2BGR))
        x = x.reshape((1,) + x.shape)
        i = 0
        for batch in datagen.flow(x, batch_size=1,
                          save_to_dir= save_to_dir, save_prefix=lbl, save_format='png'):
            i += 1
            if i > n:
                break
    

In [None]:
X_train = []
X_train_id = []
y_train = []
start_time = time.time()

print('Read train images')
folders = list(label_counts.index)
for fld in folders:
    index = folders.index(fld)
    print('Load folder {} (Index: {})'.format(fld, index))
    path = os.path.join('input', fld, '*.png')
    #print(path)
    files = glob.glob(path)
    for fl in files:
        flbase = os.path.basename(fl)
        img = read_img(fl)
        X_train.append(img)
        X_train_id.append(flbase)
        y_train.append(index)

print('Read train data time: {} seconds'.format(round(time.time() - start_time, 2)))

In [None]:
X_train = np.array(X_train, np.float32) / 255.