In [8]:
import cv2
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt
from PIL import Image
from tqdm import tqdm_notebook
from random import shuffle

import os
import shutil

In [9]:
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras_tqdm import TQDMNotebookCallback
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.constraints import maxnorm
from keras.optimizers import SGD
from keras.layers.convolutional import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras.callbacks import Callback

In [10]:
# Data Augmentation
train_datagen = ImageDataGenerator(rescale=1/255.,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   rotation_range=90,
                                   brightness_range=(0.0, 1.0),
                                   horizontal_flip=True,
                                   featurewise_center=True, featurewise_std_normalization=True) 
val_datagen = ImageDataGenerator(rescale=1/255.,
                                shear_range=0.2,
                                zoom_range=0.2,
                                rotation_range=90,
                                brightness_range=(0.0, 1.0),
                                horizontal_flip=True,
                                featurewise_center=True, featurewise_std_normalization=True)

In [11]:
batch_size = 64
train_generator = train_datagen.flow_from_directory(
        r'/home/devendra/mnistasjpg/trainingSet',
        color_mode='grayscale',
        target_size=(140, 140),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)

validation_generator = val_datagen.flow_from_directory(
        r'/home/devendra/mnistasjpg/valSet',
        color_mode='grayscale',
        target_size=(140, 140),
        batch_size=batch_size,
        class_mode='categorical',
        shuffle=True)

Found 36000 images belonging to 10 classes.
Found 5000 images belonging to 10 classes.


In [12]:
model = Sequential()
model.add(Conv2D(16, (3, 3), input_shape=(140, 140, 1), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.5))

model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

In [None]:
## Callback for loss logging per epoch
class LossHistory(Callback):
    def on_train_begin(self, logs={}):
        self.losses = []
        self.val_losses = []
        
    def on_epoch_end(self, batch, logs={}):
        self.losses.append(logs.get('loss'))
        self.val_losses.append(logs.get('val_loss'))

history = LossHistory()

## Callback for early stopping the training
early_stopping = keras.callbacks.EarlyStopping(monitor='val_loss',
                              min_delta=0,
                              patience=2,
                              verbose=0, mode='auto')

In [None]:
ratio = 0.2
n = 40000
epochs = 50
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.SGD(lr=0.1, momentum=0.9),
              metrics=['accuracy'])
model.fit_generator(train_generator,
          steps_per_epoch= int(n * (1-ratio)) // batch_size,
          epochs=epochs,
          validation_data=validation_generator,
          validation_steps= int(n * ratio) // batch_size,
          callbacks=[TQDMNotebookCallback(leave_inner=True, leave_outer=True), early_stopping, history],
          verbose=0)

In [239]:
img = cv2.imread('/home/devendra/mnistasjpg/testData/testSet/img_4.jpg')
# Convert image to grayscale
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, (280, 280))
arr = np.array(img).reshape((280, 280, 1))
arr = np.expand_dims(arr, axis=0)
prediction = model.predict(arr)[0]
bestclass = ''
bestconf = -1
for n in [0,1,2,3,4,5,6,7,8,9]:
    if (prediction[n] > bestconf):
        bestclass = str(n)
        bestconf = prediction[n]
print(bestclass)

0


In [240]:
digitpreds = []
for i in range(1, 28001):
    img = cv2.imread('/home/devendra/mnistasjpg/testData/testSet/img_' + str(i) + '.jpg')
    # Convert image to grayscale
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    img = cv2.resize(img, (280, 280))
    arr = np.array(img).reshape((280, 280, 1))
    arr = np.expand_dims(arr, axis=0)
    prediction = model.predict(arr)[0]
    bestclass = ''
    bestconf = -1
    for n in [0,1,2,3,4,5,6,7,8,9]:
        if (prediction[n] > bestconf):
            bestclass = str(n)
            bestconf = prediction[n]
    digitpreds.append(bestclass)

In [241]:
#test_datagen = ImageDataGenerator(rescale=1/255.)
#test_generator = test_datagen.flow_from_directory(
#        r'/home/devendra/mnistasjpg/testData',
#        target_size=(28, 28),
#        color_mode="rgb",
#        batch_size=1,
#        class_mode=None,
#        shuffle=False)
#test_generator.reset()
#pred = model.predict_generator(test_generator, steps=28000/1, verbose=1)
#predicted_class_indices = np.argmax(pred, axis=1)
#labels = (train_generator.class_indices)
#labels = dict((v,k) for k,v in labels.items())
#predictions = [labels[k] for k in predicted_class_indices]

In [242]:
#df_preds = pd.DataFrame(pred, columns=['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'])
#df_preds

In [243]:
#df_newpreds = df_preds.idxmax(axis=1)
#df_newpreds.index = np.arange(1, len(df_newpreds)+1)

In [244]:
#df_newpreds = df_newpreds.to_frame(name='Label')

In [245]:
#df_newpreds.index.name = 'ImageId'
#df_newpreds.to_csv('/home/devendra/mnist_kaggle_submission.csv')

In [246]:
finalpreds = pd.DataFrame(digitpreds, columns=['Label'])

In [247]:
finalpreds.index = np.arange(1, len(finalpreds)+1)

In [248]:
finalpreds.index.name = 'ImageId'

In [249]:
finalpreds.to_csv('/home/devendra/mnist_kaggle_submission_9.csv')

In [252]:
finalpreds[1160:1170]

Unnamed: 0_level_0,Label
ImageId,Unnamed: 1_level_1
1161,9
1162,1
1163,5
1164,0
1165,6
1166,4
1167,1
1168,2
1169,4
1170,5
