In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import warnings
import matplotlib.pyplot as plt 

from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from keras.utils.np_utils import to_categorical
from keras.models import Sequential,Model,load_model
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPool2D, BatchNormalization,GlobalAveragePooling2D,Input,Activation
from keras.layers.merge import add
from keras.optimizers import RMSprop
from keras.activations import relu,softmax
from keras import optimizers,applications,regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import TensorBoard,ReduceLROnPlateau,ModelCheckpoint
from tensorflow.keras import backend
from keras.models import model_from_json

seed = 42
warnings.filterwarnings("ignore")

Using TensorFlow backend.


In [4]:
df_train = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')

In [5]:
X_train = df_train.drop(['label'], axis=1)
y_train = df_train['label']

#df_train['label'] = df_train['label'].replace(to_replace=np.nan, value=0.0)
#df_train['label'] = df_train['label'].replace(to_replace='', value=0.0)
#y_train_ = df_train['label'].values.astype(np.int).reshape(-1, 1)
#X_train = df_train[df_train.columns[1:]].values.astype(np.float32).reshape((-1, 28, 28, 1))

del df_train
X_test = df_test

del df_test

print('Shape of X_train:', X_train.shape)
print('Shape of y_train:', y_train.shape)
print('Shape of X_test :', X_test.shape)

Shape of X_train: (42000, 784)
Shape of y_train: (42000,)
Shape of X_test : (28000, 784)


In [6]:
X_train = X_train / 255
X_test = X_test / 255

In [7]:
X_train = X_train.values.reshape(-1,28,28,1) 
X_test = X_test.values.reshape(-1,28,28,1)

print('Shape of X_train:', X_train.shape)
print('Shape of X_test :', X_test.shape)

Shape of X_train: (42000, 28, 28, 1)
Shape of X_test : (28000, 28, 28, 1)


In [8]:
# Encoding

#y_train = OneHotEncoder(sparse=False).fit_transform(y_train_)
y_train = to_categorical(y_train, num_classes = 10)
y_train.shape

(42000, 10)

In [9]:
nets = 1
digits = [0] * nets
history = [0] * nets
epochs = 100
batch_size = 90

In [10]:
reduction = ReduceLROnPlateau(monitor='val_acc',
                                 patience=3, 
                                 verbose=0, 
                                 factor=0.5, 
                                 min_lr=0.00001)

In [11]:
imggen = ImageDataGenerator(featurewise_center = False,
                               samplewise_center = False, 
                               featurewise_std_normalization = False,
                               samplewise_std_normalization = False,
                               zca_whitening = False,
                               rotation_range = 10, 
                               zoom_range = 0.10, 
                               width_shift_range = 0.10, 
                               height_shift_range = 0.10, 
                               horizontal_flip = False,
                               vertical_flip = False)

imggen.fit(X_train)

In [14]:
def build_pipeline():
    pipeline = Sequential()    
    
    # Gridsearch

    pipeline.add(Conv2D(filters = 32, kernel_size = (5,5), padding = 'Same', activation ='relu', 
                 input_shape = (28, 28, 1)))
    pipeline.add(BatchNormalization())
    pipeline.add(Conv2D(filters = 32, kernel_size = (5,5), padding = 'Same', activation ='relu', kernel_regularizer=regularizers.l2(0.01)))
    pipeline.add(BatchNormalization())
    pipeline.add(Conv2D(filters = 32, kernel_size = (5,5), padding = 'Same', activation ='relu', kernel_regularizer=regularizers.l2(0.01)))
    pipeline.add(BatchNormalization())

    #Maxpooling to reduce time complexity
    #pipeline.add(MaxPool2D(pool_size=(2,2)))
    pipeline.add(MaxPool2D(pool_size=(4,4)))

    # Regularization    
    #pipeline.add(Dropout(0.25))
    pipeline.add(Dropout(0.15))

    # Last layers

    pipeline.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation ='relu', kernel_regularizer=regularizers.l2(0.01)))
    pipeline.add(BatchNormalization())
    pipeline.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation ='relu', kernel_regularizer=regularizers.l2(0.01)))
    pipeline.add(BatchNormalization())
    pipeline.add(Conv2D(filters = 64, kernel_size = (3,3), padding = 'Same', activation ='relu', kernel_regularizer=regularizers.l2(0.01)))
    pipeline.add(BatchNormalization())  
    pipeline.add(MaxPool2D(pool_size=(2,2), strides=(2,2)))
    pipeline.add(Dropout(0.25))

    #Flatten
    pipeline.add(Conv2D(filters = 128, kernel_size = (3,3), padding = 'Same', activation ='sigmoid', kernel_regularizer=regularizers.l2(0.01)))
    pipeline.add(BatchNormalization())

    pipeline.add(Flatten())
    pipeline.add(Dense(128, activation = "relu"))
    pipeline.add(Dropout(0.4))

    # Depth of connected layers with probablistic categorization 
    pipeline.add(Dense(10, activation = "softmax"))

    pipeline.compile(optimizer = 'Adamax' , loss = "categorical_crossentropy", metrics=["accuracy"])
    
    return pipeline

In [15]:
print('Creating {0} CNNs...'.format(nets))
for model in range(nets):
    
    digits[model] = build_pipeline()
    
    # Splitting train and test datasets
    
    X_train_aux, X_test_aux, y_train_aux, y_test_aux = train_test_split(X_train, y_train, test_size = 0.1)
    
    history[model] = digits[model].fit_generator(imggen.flow(X_train_aux,
                                                              y_train_aux, 
                                                              batch_size = batch_size),
                                                 epochs = epochs, 
                                                 steps_per_epoch = X_train_aux.shape[0] // batch_size, 
                                                 validation_data = (X_test_aux, y_test_aux), 
                                                 callbacks=[reduction],
                                                 verbose=0)
    
    print("CNN {0:>2d}: Epochs = {1:d}, Max. Train accuracy = {2:.5f}, Max. Validation accuracy = {3:.5f}".format(
        model + 1,
        epochs, 
        max(history[model].history['acc']), 
        max(history[model].history['val_acc'])))

Creating 1 CNNs...
CNN  1: Epochs = 100, Max. Train accuracy = 0.99815, Max. Validation accuracy = 0.99643


In [16]:
predicted_label = np.zeros( (X_test.shape[0], 10) )
predicted_label

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [17]:
for model in range(nets):
    predicted_label = predicted_label + digits[model].predict(X_test)

In [18]:
# max prob index
predicted_label = np.argmax(predicted_label, axis = 1)
predicted_label = pd.Series(predicted_label, name = "Label")

In [19]:
result = pd.concat([pd.Series(range(1, 28001), name = "ImageId"), predicted_label], axis = 1)
result.to_csv("result_optimized.csv", index=False)

In [20]:
result.head()

Unnamed: 0,ImageId,Label
0,1,2
1,2,0
2,3,9
3,4,0
4,5,3


In [22]:
len(predicted_label)

28000

In [25]:
len(y_train_aux)

37800

In [31]:
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(np.argmax(y_train_aux[0:28000],axis=1), predicted_label, labels=[0,1])

In [32]:
tn,fp,fn,tp = matrix.ravel()