In [166]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Input, Flatten

In [140]:
def load_data_csv(filename):
    return pd.read_csv(filename)

In [141]:
def create_image_from_row(X):
    x = X.to_list()
    return_value = np.zeros((28, 28, 1))
    
    data = 0
    for i in range(28):
        for j in range(28):
            return_value[i, j, 0] = x[data] / 255
            data += 1
    
    return return_value

In [142]:
def create_x(df):
    rows = df.shape[0]
    x = []
    for i in range(rows):
        x.append(create_image_from_row(df.iloc[i,:]))
    X = np.array(x)
    return X

In [143]:
def create_y(df):
    y = []
    data = df.to_list()
    
    for i in data:
        new_data = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
        
        if i == 0:
            new_data[0] = 1
        elif i == 1:
            new_data[1] = 1
        elif i == 2:
            new_data[2] = 1
        elif i == 3:
            new_data[3] = 1
        elif i == 4:
            new_data[4] = 1
        elif i == 5:
            new_data[5] = 1
        elif i == 6:
            new_data[6] = 1
        elif i == 7:
            new_data[7] = 1
        elif i == 8:
            new_data[8] = 1
        elif i == 9:
            new_data[9] = 1
        y.append(new_data)
        
    return np.array(y)

In [144]:
def split_x_y(df):
    y = df['label']
    x = df.drop(columns='label')
    return x,y

In [159]:
def create_model():
    inputs = Input(shape=(28,28,1))
    x = Conv2D(filters=32, kernel_size=(2,2), strides=(1,1), padding='same', activation='relu')(inputs)
    x = Flatten()(x)
    x = Dense(32, activation='relu')(x)
    outputs = Dense(10, activation='softmax')(x)
    
    model = Model(inputs=[inputs], outputs=[outputs])
    model.compile(optimizer=Adam(lr=0.0001), loss='mse')
    
    return model

In [165]:
def reformat_y(data):
    y = []
    rows = data.shape[0]
    
    for i in range(rows):
        y.append(
            np.argmax(data[i, :])
        )
        
    return np.array(y)

# TRAIN

In [160]:
df = load_data_csv('train.csv')
x, y = split_x_y(df)
X = create_x(x)
Y = create_y(y)
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = 0.2)

In [161]:
X.shape

(42000, 28, 28, 1)

In [162]:
model = create_model()

In [163]:
model.fit(X_train, y_train, verbose=1, epochs=30, batch_size=32)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<tensorflow.python.keras.callbacks.History at 0x7ff353219df0>

In [164]:
y_pred = model.predict(X_test)

In [168]:
orig_y = reformat_y(y_test)
my_y = reformat_y(y_pred)

accuracy_score(orig_y, my_y)

0.9719047619047619

# TEST

In [169]:
df = load_data_csv('test.csv')
X = create_x(df)
y_pred = model.predict(X)
y_pred = reformat_y(y_pred)

In [171]:
# CREATE DICTIONARY FOR DATAFRAME
dataframe_dictionary = {
    'ImageId': range(1, df.shape[0] + 1),
    'Label': y_pred
}

# CREATE DATAFRAME
submission_df = pd.DataFrame(dataframe_dictionary, index=None)
submission_df.to_csv('submission.csv', index=None)