In [1]:
%matplotlib inline

import matplotlib.pyplot as plt
import shutil, csv, os, numpy as np
import pandas as pd
from glob import glob
from __future__ import print_function, division

import keras
from keras.models import Sequential
from keras.layers.normalization import BatchNormalization
from keras.layers.core import Dense, Dropout, Flatten, Activation, Lambda
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.utils import np_utils
from keras.utils.data_utils import get_file
from keras.optimizers import Adam, RMSprop, SGD, Adagrad
from keras.preprocessing import image
from keras.models import model_from_json
import h5py

import cv2

Using TensorFlow backend.


In [2]:
path = 'data/'
# path = 'data/sample/'

In [3]:
batch_size = 64

# Todo

    1. Create train, valid and sample folders
    2. Move images into train, valid and sample sub folders
    3. Build neural network
    4. Submit

# Create train, valid and sample folders

In [4]:
data = []
with open('data/train.csv') as f:
    reader = csv.reader(f)
    next(reader, None)
    
    for row in reader:
        data.append(row)

In [5]:
len(data)

49000

In [9]:
data[:][:5]

[['0.png', '4'],
 ['1.png', '9'],
 ['2.png', '1'],
 ['3.png', '7'],
 ['4.png', '3']]

In [10]:
os.mkdir(path + 'valid')
os.mkdir(path + 'sample')
os.mkdir(path + 'sample/train')
os.mkdir(path + 'sample/valid')

In [11]:
# Create sub folder for categories in train folder
for i in range(len(data)):
    if not os.path.isdir(path + 'train/' + data[i][1]):
        os.makedirs(path + 'train/' + data[i][1])

In [12]:
# Create sub folder for categories in valid folder
for i in range(len(data)):
    if not os.path.isdir(path + 'valid/' + data[i][1]):
        os.makedirs(path + 'valid/' + data[i][1])

In [13]:
# Create sub folder for categories in sample/train folder
for i in range(len(data)):
    if not os.path.isdir(path + 'sample/train/' + data[i][1]):
        os.makedirs(path + 'sample/train/' + data[i][1])
        
        
# Create sub folder for categories in valid folder
for i in range(len(data)):
    if not os.path.isdir(path + 'sample/valid/' + data[i][1]):
        os.makedirs(path + 'sample/valid/' + data[i][1])

# Move images into train, valid and sample sub folders

In [14]:
# Move images from train folder into subfolder
for i in range(len(data)):
    shutil.move(path + 'train/' + data[i][0], path + 'train/' + data[i][1])

In [15]:
# Move images from train/* folder into valid/*
dir_names = [i for i in os.listdir(path + 'train/') if os.path.isdir(os.path.join(path + 'train', i))]

for d in dir_names:
    g = glob(path + 'train/' + d + '/' + '*.png')
    shuf = np.random.permutation(g)
    
    for i in range(int(len(g) / 5)): shutil.move(shuf[i], path + 'valid/' + d +'/')

In [16]:
# Move images from train/* folder into sample/train/*
dir_names = [i for i in os.listdir(path + 'train/') if os.path.isdir(os.path.join(path + 'train', i))]

for d in dir_names:
    g = glob(path + 'train/' + d + '/' + '*.png')
    shuf = np.random.permutation(g)
    
    for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/train/' + d +'/')
        
        
# Move images from valid/* folder into sample/valid/*
dir_names = [i for i in os.listdir(path + 'valid/') if os.path.isdir(os.path.join(path + 'valid', i))]

for d in dir_names:
    g = glob(path + 'valid/' + d + '/' + '*.png')
    shuf = np.random.permutation(g)
    
    for i in range(int(len(g) / 10)): shutil.copy2(shuf[i], path + 'sample/valid/' + d +'/')

# Build Neural Network

## Fully Connected Neural Network

In [17]:
input_img = cv2.imread('data/train/0/14.png')
input_img.shape

(28, 28, 3)

In [18]:
input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
input_img.shape

(28, 28)

In [19]:
X_train = []
y_train = []

In [20]:
os.listdir(os.path.join(path, 'train/'))

['0', '2', '1', '5', '7', '8', '4', '3', '9', '6']

In [21]:
for folder in os.listdir(path + 'train/'):
    for file in os.listdir(path + 'train/' + folder + '/'):
        if os.path.isfile(path + 'train/' + folder + '/' + file):
            
            input_img = cv2.imread(path + 'train/' + folder + '/' + file)
            input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
            input_img_flatten = cv2.resize(input_img, (28, 28)).flatten()
            X_train.append(input_img_flatten)
            
            y_cat = np_utils.to_categorical(int(folder), 10) # 10 represent number of classed
            y_train.append(y_cat)

In [22]:
X_train = np.asarray(X_train)

In [23]:
y_train = np.asarray(y_train)

In [24]:
X_train.shape

(39205, 784)

In [25]:
y_train.shape

(39205, 10)

In [26]:
y_train

array([[ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       [ 1.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [27]:
X_valid = []
y_valid = []

In [28]:
for folder in os.listdir(path + 'valid/'):
    for file in os.listdir(path + 'valid/' + folder + '/'):
        if os.path.isfile(path + 'valid/' + folder + '/' + file):
            
            input_img = cv2.imread(path + 'valid/' + folder + '/' + file)
            input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
            input_img_flatten = cv2.resize(input_img, (28, 28)).flatten()
            
            X_valid.append(input_img_flatten)
            
            y_cat = np_utils.to_categorical(int(folder), 10) # 10 represent number of classed
            y_valid.append(y_cat)

In [29]:
X_valid = np.asarray(X_valid)

In [30]:
y_valid = np.asarray(y_valid)

In [31]:
X_valid.shape

(9795, 784)

In [32]:
y_valid.shape

(9795, 10)

In [33]:
model = Sequential()
model.add(Dense(64, input_dim = 784, activation = 'relu'))
model.add(Dense(128, activation = 'relu'))
model.add(Dense(256, activation = 'relu'))
model.add(Dense(128, activation = 'relu'))
model.add(Dense(10, activation = 'softmax'))
model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ["accuracy"])

In [34]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 64)                50240     
_________________________________________________________________
dense_2 (Dense)              (None, 128)               8320      
_________________________________________________________________
dense_3 (Dense)              (None, 256)               33024     
_________________________________________________________________
dense_4 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_5 (Dense)              (None, 10)                1290      
Total params: 125,770
Trainable params: 125,770
Non-trainable params: 0
_________________________________________________________________


In [35]:
model.fit(X_train, y_train, epochs = 10, batch_size = batch_size, validation_data = [X_valid, y_valid])

Train on 39205 samples, validate on 9795 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa2abbd30b8>

In [21]:
model.save_weights('weights/fully_connected.h5')

## Convolutional Neural Network

In [41]:
X_train = []
y_train = []

In [42]:
for folder in os.listdir(path + 'train/'):
    for file in os.listdir(path + 'train/' + folder + '/'):
        if os.path.isfile(path + 'train/' + folder + '/' + file):
            
            input_img = cv2.imread(path + 'train/' + folder + '/' + file)
            # input_img = cv2.cvtColor(input_img, cv2.COLOR_BGR2GRAY)
            input_img = cv2.resize(input_img, (28, 28))
            X_train.append(input_img)
            
            y_cat = np_utils.to_categorical(int(folder), 10) # 10 represent number of classed
            y_train.append(y_cat)

In [43]:
X_train = np.asarray(X_train)
y_train = np.asarray(y_train)

In [46]:
print(X_train.shape)
print(y_train.shape)

(39205, 28, 28, 3)
(39205, 10)


In [52]:
X_valid = []
y_valid = []

In [53]:
for folder in os.listdir(path + 'valid/'):
    for file in os.listdir(path + 'valid/' + folder + '/'):
        if os.path.isfile(path + 'valid/' + folder + '/' + file):
            
            input_img = cv2.imread(path + 'valid/' + folder + '/' + file)
            input_img_flatten = cv2.resize(input_img, (28, 28))
            
            X_valid.append(input_img_flatten)
            
            y_cat = np_utils.to_categorical(int(folder), 10) # 10 represent number of classed
            y_valid.append(y_cat)

In [54]:
X_valid = np.asarray(X_valid)
y_valid = np.asarray(y_valid)

In [55]:
print(X_valid.shape)
print(y_valid.shape)

(9795, 28, 28, 3)
(9795, 10)


In [58]:
model = Sequential()

model.add(Convolution2D(32, (3, 3), input_shape=(28, 28, 3), padding='same', activation='relu'))
model.add(Convolution2D(32, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Convolution2D(64, (3, 3), padding='same', activation='relu'))
model.add(Convolution2D(64, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Convolution2D(128, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D((2, 2)))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(512, activation='relu'))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.compile(loss = 'categorical_crossentropy', optimizer = 'adam', metrics = ['accuracy'])

In [59]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_6 (Conv2D)            (None, 28, 28, 32)        896       
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 28, 28, 32)        9248      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
conv2d_9 (Conv2D)            (None, 14, 14, 64)        36928     
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_10 (Conv2D)           (None, 7, 7, 128)         73856     
__________

In [60]:
model.fit(X_train, y_train, epochs = 10, batch_size = batch_size, validation_data = [X_valid, y_valid])

Train on 39205 samples, validate on 9795 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa29be4e9e8>

In [63]:
# serialize model to JSON
model_json = model.to_json()
with open(path + "results/conv_e10.json", "w") as json_file:
    json_file.write(model_json)

# serialize weights to HDF5
model.save_weights(path + "results/conv_e10.h5")
print("Saved model to disk")

Saved model to disk


In [65]:
# load json and create model
json_file = open(path + 'results/conv_e10.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)

# load weights into new model
loaded_model.load_weights(path + "results/conv_e10.h5")
print("Loaded model from disk")

Loaded model from disk
