In [1]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
import csv
import numpy as np
import pandas as pd


# Open data and arrange them in the correct way

For model.fit(), I want a list of numpy arrays of shape 28,28,1 (as the total pixels per digit is 28 * 28) for my x data, pertaining to each training digit, and a numpy array that displays which image pertains to which digit. 

In [2]:
with open('train.csv') as f:
    reader = csv.reader(f)
    contents = list(reader)
df = pd.read_csv('train.csv')

labels = df['label'].values

numbers = []
shape = np.shape(contents)
num_inputs = shape[1]
num_samples = shape[0]
for i in np.arange(1,num_samples):
    pixels = contents[i][1:]
    pixels = np.array(pixels, dtype = int)
    pixels = np.reshape(pixels, (28,28,1))
    pixels = pixels / 255
    numbers.append(pixels)
numbers = np.array(numbers)

# Arrange test data

In [3]:
with open('test.csv') as f:
    reader = csv.reader(f)
    test_contents = list(reader)

In [4]:
test_numbers = []
test_shape = np.shape(test_contents)
num_inputs = test_shape[1]
num_samples = test_shape[0]
for i in np.arange(1,num_samples):
    pixels = test_contents[i]
    pixels = np.array(pixels, dtype = int)
    pixels = np.reshape(pixels, (28,28,1))
    pixels = pixels / 255
    test_numbers.append(pixels)
test_numbers = np.array(test_numbers)

I chose the following formula for the number of neurons in the hidden layer. This was based off of some rudimentary Googling and seeing if the number did a reasonable job. There's not an entirely clear consensus on how many neurons one should have in their hidden layer, but a good resource for more info is here: https://stats.stackexchange.com/questions/181/how-to-choose-the-number-of-hidden-layers-and-nodes-in-a-feedforward-neural-netw

In [5]:
num_hidden_layer = round(2/3 * num_inputs + 10)

# Model assembly
I chose for my model to have a convolutional layer, and a max pooling layer -- nothing particularly extraordinary. I figured one convolution would be sufficient (I didn't want the fitting to take forever and I wanted to avoid overfitting), and I chose 32 filters arbitrarily. 

In [6]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (3,3), activation='relu', input_shape=(28, 28, 1)),
    tf.keras.layers.MaxPooling2D(pool_size=(2,2)),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(num_hidden_layer, activation = 'relu'),
    tf.keras.layers.Dense(10, activation = 'softmax')
])

In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 26, 26, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 13, 13, 32)        0         
_________________________________________________________________
flatten (Flatten)            (None, 5408)              0         
_________________________________________________________________
dense (Dense)                (None, 533)               2882997   
_________________________________________________________________
dense_1 (Dense)              (None, 10)                5340      
Total params: 2,888,657
Trainable params: 2,888,657
Non-trainable params: 0
_________________________________________________________________


# Compiling the model
I chose Adam because I think it's a great optimizer generally for a large range of datasets and was suitable here. I chose sparse categorical crossentropy for my loss function because I have integer targets. Not really sure why to use categorical crossentropy in general besides it being a popular loss function for classification problems like these. 

In [8]:
model.compile(optimizer = 'adam',
              loss = 'sparse_categorical_crossentropy',
              metrics=['accuracy'])



In [9]:
model.fit(x=numbers, y = labels, epochs = 5, verbose = 1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x167c0d160>

In [10]:
predictions = model.predict_on_batch(test_numbers)

# Save the prediction results to a csv

In [11]:
i = 1
guesses = []
for arrays in predictions:
    guesses.append([i,np.where([val == max(arrays) for val in arrays])[0][0]])
    i+=1

In [12]:
with open("submission.csv", "w") as f:
    writer = csv.writer(f)
    writer.writerow(["ImageId","Label"])
    for values in guesses:
        writer.writerow([values[0],values[1]])