# MNIST ML - Load / process raw data

In [None]:
import sys
import os
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras 

print(sys.executable)
print(os.getcwd())

In [None]:
sys.version

In [None]:
def read_mnist_image_data(image_data_file):
    """
    image_data_file: path to the mnist image data file on disk.
    
    returns a dictionary with the following keys:
    - magic_number: to be compared against the magic number for each file on MNIST website
    - number_of_images: the number of images included in the dataset
    - pixel_rows: the number of pixels per row per image
    - pixel_cols: the number of pixels per column per image
    - data: the actual image data as ndarray with shape of (number_of_images, pixel_rows, pixel_cols)
    """
    
    results = {}
    
    with open(image_data_file, 'rb') as f:
        _data = f.read()
        
    results['magic_number'] = int.from_bytes(_data[0:4], 'big')
    results['number_of_images'] = int.from_bytes(_data[4:8], 'big')
    results['pixel_rows'] = int.from_bytes(_data[8:12], 'big')
    results['pixel_cols'] = int.from_bytes(_data[12:16], 'big')
    
    pixel_data = np.asarray([pixel for pixel in _data[16:]])
    pixel_data = pixel_data.reshape(results['number_of_images'], results['pixel_rows'], results['pixel_cols'])
    results['data'] = pixel_data
    
    return results
    
    
def read_mnist_label_data(label_data_file):
    """
    label_data_file: path to the mnist label data file on disk.
    
    returns a dictionary with the following keys:
    - magic_number: to be compared against the magic number for each file on MNIST website
    - number_of_labels: the number of labels included in the dataset
    - labels: the actual label data as ndarray (1 dimensional)
    """
    
    results = {}
    
    with open(label_data_file, 'rb') as f:
        _data = f.read()
        
    results['magic_number'] = int.from_bytes(_data[0:4], 'big')
    results['number_of_labels'] = int.from_bytes(_data[4:8], 'big')    
    results['labels'] = np.asarray([(label / 1.0) for label in _data[8:]]).astype('int')
    
    return results
    
    

In [None]:
# set up training images (divide )
x_train = read_mnist_image_data("train-images-idx3-ubyte")['data'] / 255.0
y_train = read_mnist_label_data("train-labels-idx1-ubyte")['labels']

x_validation = read_mnist_image_data('t10k-images-idx3-ubyte')['data'] /255.0
y_validation = read_mnist_label_data('t10k-labels-idx1-ubyte')['labels']


In [None]:
class_names = [str(x) for x in range(0, 10)]
class_names

In [None]:
model = keras.Sequential([
    keras.layers.Flatten(input_shape=(28, 28)),
    keras.layers.Dense(128, activation='relu'),
    keras.layers.Dense(10)
])


model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])



In [None]:
model.fit(x_train, y_train, epochs=10)

test_loss, test_acc = model.evaluate(x_validation,  y_validation, verbose=2)

print('\nTest accuracy:', test_acc)



# Model Plotting / Basic Testing

In [None]:
tf.keras.models.save_model(probability_model, 'pynumdraw_model.hdf5')

In [None]:
# probability_model = tf.keras.Sequential([model, tf.keras.layers.Softmax()])
probability_model = tf.keras.models.load_model('pynumdraw_model.hdf5', compile=False)
# this is how you generate a batch of the prediction array
predictions = probability_model.predict(x_validation)

In [None]:

# this is how you generate a single prediction
image_index = 0
predicted_probabilities = probability_model.predict(x_validation[image_index, :, :].reshape((1, 28, 28)))
print(predicted_probabilities)


# this is how you find the confidence of that prediction (max of all of the values)
print(np.max(predicted_probabilities))
print( 100 * np.max(predicted_probabilities))



In [None]:
def plot_image(i, predictions_array, true_label, img):
    
    true_label = true_label.astype('int')
    predictions_array, true_label, img = predictions_array, true_label[i], img[i]
    plt.grid(False)
    plt.xticks([])
    plt.yticks([])

    plt.imshow(img, cmap=plt.cm.binary)

    predicted_label = np.argmax(predictions_array)
    if predicted_label == true_label:
        color = 'blue'
    else:
        color = 'red'

    plt.xlabel("{} {:2.0f}% ({})".format(class_names[predicted_label],
                                100*np.max(predictions_array),
                                class_names[true_label]),
                                color=color)

def plot_value_array(i, predictions_array, true_label):
    
    true_label = true_label.astype('int')
    predictions_array, true_label = predictions_array, true_label[i]
    plt.grid(False)
    plt.xticks(range(10))
    plt.yticks([])
    thisplot = plt.bar(range(10), predictions_array, color="#777777")
    plt.ylim([0, 1])
    predicted_label = np.argmax(predictions_array)

    thisplot[predicted_label].set_color('red')
    thisplot[true_label].set_color('blue')


In [None]:
x = np.array([1, 2, 3, 4])

In [None]:
x.astype('int')