In [8]:
from __future__ import print_function

import glob
import math
import os

from matplotlib import cm
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn import metrics
import tensorflow as tf
from tensorflow.python.data import Dataset

tf.logging.set_verbosity(tf.logging.ERROR)
pd.options.display.max_rows = 10
pd.options.display.float_format = '{:.1f}'.format

In [13]:
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, Lambda, MaxPooling2D
from keras import backend as K
from keras import layers
import keras
from keras import Model

In [14]:
img_input = layers.Input(shape=(28, 28, 1))

x = layers.Conv2D(16, 3, activation='relu')(img_input)
x = layers.Conv2D(32, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Dropout(0.2)(x)

x = layers.Conv2D(64, 3, activation='relu')(x)
x = layers.Conv2D(128, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)
x = layers.Dropout(0.2)(x)

x = layers.Conv2D(256, 3, activation='relu')(x)
x = layers.MaxPooling2D(2)(x)

# Flatten feature map to a 1-dim tensor so we can add fully connected layers
x = layers.Flatten()(x)

# Create a fully connected layer with ReLU activation and 512 hidden units
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.3)(x)
# Create output layer with a single node and sigmoid activation
output = layers.Dense(10, activation='sigmoid')(x)

# Create model:
# input = input feature map
# output = input feature map + stacked convolution/maxpooling layers + fully 
# connected layer + sigmoid output layer
model = Model(img_input, output)

In [15]:
from tensorflow.keras.optimizers import RMSprop

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['acc'])

In [16]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_4 (InputLayer)         (None, 28, 28, 1)         0         
_________________________________________________________________
conv2d_16 (Conv2D)           (None, 26, 26, 16)        160       
_________________________________________________________________
conv2d_17 (Conv2D)           (None, 24, 24, 32)        4640      
_________________________________________________________________
max_pooling2d_10 (MaxPooling (None, 12, 12, 32)        0         
_________________________________________________________________
dropout_10 (Dropout)         (None, 12, 12, 32)        0         
_________________________________________________________________
conv2d_18 (Conv2D)           (None, 10, 10, 64)        18496     
_________________________________________________________________
conv2d_19 (Conv2D)           (None, 8, 8, 128)         73856     
__________

In [17]:
from sklearn.model_selection import train_test_split
from keras.utils.np_utils import to_categorical

train = pd.read_csv("/home/tblain/Documents/projet_perso/kaggle_MNIST/train.csv")

In [18]:
X_train_org = np.array(train.drop(['label'], axis=1)).reshape((-1, 28, 28, 1))

y_train_org = train.label
y_train_org = to_categorical(y_train_org, num_classes=10)

X_train, X_val, y_train, y_val = train_test_split(X_train_org, y_train_org, test_size = 0.01)

In [19]:
from keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(rescale=1/255,
        rotation_range=10,  # randomly rotate images in the range (degrees, 0 to 180)
        zoom_range = 0.1, # Randomly zoom image 
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height)
)

In [36]:
batch_size = 64
data_flow = datagen.flow(x=X_train, y=y_train, batch_size=batch_size)
history = model.fit_generator(data_flow, steps_per_epoch=len(X_train_org) // batch_size, epochs=10, \
                              validation_data=(X_val, y_val))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [24]:
X_test = np.array(pd.read_csv("/home/tblain/Documents/projet_perso/kaggle_MNIST/test.csv")).reshape((-1, 28, 28, 1))
X_test = X_test / 255


In [25]:
y_test = model.predict(X_test, verbose=1)




In [26]:
y_test = np.argmax(y_test, axis=-1)
y_test

array([2, 0, 9, ..., 3, 9, 2])

In [33]:
y_test = pd.DataFrame(y_test, index=range(1, len(y_test)+1), columns=['Label'])

In [34]:
y_test.to_csv("submission.csv", index_label='ImageId')