In [36]:
## import libaries
import pandas as pd
import numpy as np
import cv2
import os, sys
import glob
import time
from tqdm import tqdm
from sklearn.preprocessing import LabelEncoder
from sklearn.cross_validation import train_test_split

from keras import __version__
#from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Convolution2D, MaxPooling2D
from keras.callbacks import EarlyStopping
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.preprocessing.image import ImageDataGenerator
from keras.optimizers import SGD
from keras.utils import to_categorical

In [4]:
IM_WIDTH, IM_HEIGHT = 64, 64 #fixed size for InceptionV3
NB_EPOCHS = 10
BAT_SIZE = 32
FC_SIZE = 1024
NB_IV3_LAYERS_TO_FREEZE = 172

In [5]:
train = pd.read_csv('input/train.csv')
test = pd.read_csv('input/test.csv')

In [6]:
# function to read image
def read_img(img_path):
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)
    img = cv2.resize(img, (IM_WIDTH, IM_HEIGHT))
    return img

In [7]:
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest')

In [8]:
label_counts = train.label.value_counts()
print ('The train data has {} unique labels'.format(train['label'].nunique()))

The train data has 25 unique labels


In [9]:
for lbl in label_counts.index:
    #print(lbl)
    save_to_dir = 'input/' + lbl
    if not os.path.exists(save_to_dir):
        os.mkdir(save_to_dir)
    else:
        print(lbl + ' already save.')
        continue
    img_id = train[train['label'] == lbl]['image_id'].values
    n = 1000//len(img_id)
    for img in tqdm(img_id):    
        x = read_image(TRAIN_PATH + '{}.png'.format(img))
        cv2.imwrite('input/' + lbl+ '/' + img +'.png', cv2.cvtColor(x, cv2.COLOR_RGB2BGR))
        x = x.reshape((1,) + x.shape)
        i = 0
        for batch in datagen.flow(x, batch_size=1,
                          save_to_dir= save_to_dir, save_prefix=lbl, save_format='png'):
            i += 1
            if i > n:
                break
    

candy already save.
chocolate already save.
juice already save.
coffee already save.
tea already save.
cereal already save.
water already save.
jam already save.
spices already save.
honey already save.
chips already save.
soda already save.
pasta already save.
tomatosauce already save.
nuts already save.
milk already save.
cake already save.
vinegar already save.
rice already save.
oil already save.
beans already save.
sugar already save.
flour already save.
fish already save.
corn already save.


In [None]:
X_train = []
X_train_id = []
y_train = []
start_time = time.time()

print('Read train images')
folders = list(label_counts.index)
for fld in folders:
    index = folders.index(fld)
    print('Load folder {} (Index: {})'.format(fld, index))
    path = os.path.join('input', fld, '*.png')
    #print(path)
    files = glob.glob(path)
    print(len(files))
    for fl in files:
        flbase = os.path.basename(fl)
        img = read_img(fl)
        X_train.append(img)
        X_train_id.append(flbase)
        y_train.append(index)

print('Read train data time: {} seconds'.format(round(time.time() - start_time, 2)))

In [12]:
X_train = np.array(X_train, np.float32) / 255.

In [50]:
y_train = np.array(y_train)
y_train = to_categorical(y_train)

In [26]:
print(X_train.shape, np.array(y_train).shape)

(28770, 64, 64, 3) (28770,)


train_X, valid_X, train_y, valid_y = train_test_split(X_train, y_train, test_size = 0.25, random_state = 32)

In [28]:
train_X, valid_X, train_y, valid_y = train_test_split(X_train, y_train, test_size = 0.25, random_state = 32)

In [45]:
num_class = label_counts.shape[0]
num_class

25

In [48]:
model = Sequential()
model.add(Convolution2D(32, (3,3), activation='relu', padding='same',input_shape = (64,64,3))) # if you resize the image above, shape would be (128,128,3)
model.add(Convolution2D(32, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))

model.add(Convolution2D(64, (3,3), activation='relu', padding='same'))
model.add(Convolution2D(64, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Convolution2D(128, (3,3), activation='relu', padding='same'))
model.add(Convolution2D(128, (3,3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.25))
model.add(Dense(num_class, activation='softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [51]:
early_stops = EarlyStopping(patience=3, monitor='val_acc')

model.fit(X_train, y_train, batch_size=BAT_SIZE, epochs=NB_EPOCHS, shuffle=True,
                  verbose=2, validation_split=0.3, callbacks=[early_stops])
                  

Train on 20139 samples, validate on 8631 samples
Epoch 1/30
45s - loss: 2.7624 - acc: 0.1084 - val_loss: 9.9047 - val_acc: 0.0000e+00
Epoch 2/30
22s - loss: 2.4648 - acc: 0.2070 - val_loss: 12.1298 - val_acc: 5.7931e-04
Epoch 3/30
22s - loss: 2.1764 - acc: 0.2988 - val_loss: 14.6148 - val_acc: 0.0022
Epoch 4/30
22s - loss: 1.9369 - acc: 0.3768 - val_loss: 14.1348 - val_acc: 0.0022
Epoch 5/30
22s - loss: 1.7396 - acc: 0.4428 - val_loss: 14.6491 - val_acc: 0.0016
Epoch 6/30
22s - loss: 1.5472 - acc: 0.5013 - val_loss: 15.3544 - val_acc: 0.0031
Epoch 7/30
22s - loss: 1.3774 - acc: 0.5518 - val_loss: 15.3677 - val_acc: 0.0038
Epoch 8/30
22s - loss: 1.2074 - acc: 0.6062 - val_loss: 15.7469 - val_acc: 0.0050
Epoch 9/30
22s - loss: 1.0898 - acc: 0.6432 - val_loss: 15.7767 - val_acc: 0.0037
Epoch 10/30
22s - loss: 0.9735 - acc: 0.6775 - val_loss: 15.8470 - val_acc: 0.0048
Epoch 11/30
22s - loss: 0.8710 - acc: 0.7104 - val_loss: 15.8718 - val_acc: 0.0054
Epoch 12/30
22s - loss: 0.7766 - acc: 0.

<keras.callbacks.History at 0x7fe78473d668>