In [1]:
from tqdm import tqdm

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import cv2
import os
import natsort
import keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Dense, Flatten, Dropout
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import img_to_array
from keras.preprocessing.image import ImageDataGenerator
from sklearn import model_selection
from keras.models import load_model
from sklearn.preprocessing import LabelBinarizer
from keras.callbacks import ModelCheckpoint
from keras.layers.advanced_activations import LeakyReLU
from keras.optimizers import Adam
from keras.callbacks import LearningRateScheduler


from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

Using TensorFlow backend.


In [2]:
from zipfile import ZipFile
file_name = "data/train.zip"
with ZipFile(file_name, 'r') as zip:
  zip.extractall()
  print('done')

done


In [3]:
df = pd.read_csv('data/train.csv')
test_data = pd.read_csv('data/test.csv')
submission = pd.read_csv('data/sample_submission.csv')
df.head()

Unnamed: 0,id,label
0,1,9
1,2,0
2,3,0
3,4,3
4,5,0


In [4]:
TRAIN_DIR = 'train'

IMG_SIZE=100
data=[]
labels_240=[]
i = 0
for img in tqdm(os.listdir(TRAIN_DIR)):
    path = os.path.join(TRAIN_DIR,img)
    img_data = cv2.imread(path)
    img_data = cv2.resize(img_data, (IMG_SIZE,IMG_SIZE))
    image=img_to_array(img_data)
    data.append(image)


100%|██████████| 60000/60000 [00:20<00:00, 2882.18it/s]


In [5]:
image.shape

(100, 100, 3)

In [6]:
labels_240=[]
for img in tqdm(os.listdir(TRAIN_DIR)):
    number = int(img[:-4])  
    categ_num = int(df[df.id==number].label)
    labels_240.append(categ_num)

100%|██████████| 60000/60000 [00:59<00:00, 1007.00it/s]


In [7]:
num_classes=len(np.unique(labels_240))
data=np.array(data,dtype="float32")/255.0
labels_240=np.array(labels_240)
lb=LabelBinarizer()
labels_240=lb.fit_transform(labels_240)

In [8]:
datagen = ImageDataGenerator(zoom_range = 0.1,
                            height_shift_range = 0.1,
                            width_shift_range = 0.1,
                            rotation_range = 10)

In [9]:
model=Sequential()
model.add(Conv2D(32,(3,3),padding="same",activation="linear",input_shape=(100,100,3)))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization(axis=-1))
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Dropout(0.25))

model.add(Conv2D(32,(3,3),padding="same",activation="linear"))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization(axis=-1))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Dropout(0.2))

model.add(Conv2D(64,(3,3),activation='relu'))
model.add(Conv2D(64,(3,3),padding="same",activation="linear"))
model.add(LeakyReLU(alpha=0.1))
model.add(Dropout(0.25))

model.add(Conv2D(128,(3,3),padding="same",activation="linear"))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization(axis=-1))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(128,(3,3),padding="same",activation="relu"))
model.add(BatchNormalization(axis=-1))
model.add(Dropout(0.25))

model.add(Flatten())
model.add(Dense(1024,activation="linear"))
model.add(LeakyReLU(alpha=0.1))
model.add(BatchNormalization())
model.add(Dropout(0.5))
model.add(Dense(10
                ,activation="softmax"))

model.compile(loss='categorical_crossentropy', optimizer = Adam(lr=1e-4), metrics=["accuracy"])

In [10]:
x_train,x_test,y_train,y_test=model_selection.train_test_split(data,labels_240,test_size=0.25,random_state=47)

In [11]:
  len(x_train)
  len(y_train)
  len(y_test)
  len(x_test)
  print('')
  len(x_train[0])

45000

45000

15000

15000




100

In [12]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 100, 100, 32)      896       
_________________________________________________________________
leaky_re_lu_1 (LeakyReLU)    (None, 100, 100, 32)      0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 100, 100, 32)      128       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 33, 33, 32)        0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 33, 33, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 33, 33, 32)        9248      
_________________________________________________________________
leaky_re_lu_2 (LeakyReLU)    (None, 33, 33, 32)        0         
__________

In [13]:
aug=ImageDataGenerator(rotation_range=25,width_shift_range=0.1,height_shift_range=0.1,shear_range=0.2,horizontal_flip=True,fill_mode="nearest")


In [14]:
checkpoint=ModelCheckpoint(filepath='data/model_best2.hdf5',
                           save_best_only=True,verbose=1)


In [15]:
train=model.fit_generator(aug.flow(x_train,y_train,batch_size=64),validation_data=(x_test,y_test),
                          steps_per_epoch=len(x_train)/150,epochs=50,verbose=1,callbacks=[checkpoint])

Epoch 1/50

Epoch 00001: val_loss improved from inf to 1.62171, saving model to data/model_best2.hdf5
Epoch 2/50

Epoch 00002: val_loss improved from 1.62171 to 1.01193, saving model to data/model_best2.hdf5
Epoch 3/50

Epoch 00003: val_loss did not improve from 1.01193
Epoch 4/50

Epoch 00004: val_loss improved from 1.01193 to 0.90320, saving model to data/model_best2.hdf5
Epoch 5/50

Epoch 00005: val_loss improved from 0.90320 to 0.86879, saving model to data/model_best2.hdf5
Epoch 6/50

Epoch 00006: val_loss improved from 0.86879 to 0.77189, saving model to data/model_best2.hdf5
Epoch 7/50

Epoch 00007: val_loss improved from 0.77189 to 0.61246, saving model to data/model_best2.hdf5
Epoch 8/50

Epoch 00008: val_loss improved from 0.61246 to 0.59954, saving model to data/model_best2.hdf5
Epoch 9/50

Epoch 00009: val_loss improved from 0.59954 to 0.51996, saving model to data/model_best2.hdf5
Epoch 10/50

Epoch 00010: val_loss did not improve from 0.51996
Epoch 11/50

Epoch 00011: val


Epoch 00041: val_loss did not improve from 0.29111
Epoch 42/50

Epoch 00042: val_loss did not improve from 0.29111
Epoch 43/50

Epoch 00043: val_loss did not improve from 0.29111
Epoch 44/50

Epoch 00044: val_loss did not improve from 0.29111
Epoch 45/50

Epoch 00045: val_loss did not improve from 0.29111
Epoch 46/50

Epoch 00046: val_loss did not improve from 0.29111
Epoch 47/50

Epoch 00047: val_loss did not improve from 0.29111
Epoch 48/50

Epoch 00048: val_loss improved from 0.29111 to 0.28564, saving model to data/model_best2.hdf5
Epoch 49/50

Epoch 00049: val_loss improved from 0.28564 to 0.26957, saving model to data/model_best2.hdf5
Epoch 50/50

Epoch 00050: val_loss did not improve from 0.26957
