### Load Compressed Test-data files 

In [10]:
import numpy as np
import argparse
import os
import joblib
import gzip
import struct


# load compressed MNIST gz files and return numpy arrays
def load_data(filename, label=False):
    with gzip.open(filename) as gz:
        struct.unpack('I', gz.read(4))
        n_items = struct.unpack('>I', gz.read(4))
        if not label:
            n_rows = struct.unpack('>I', gz.read(4))[0]
            n_cols = struct.unpack('>I', gz.read(4))[0]
            res = np.frombuffer(gz.read(n_items[0] * n_rows * n_cols), dtype=np.uint8)
            res = res.reshape(n_items[0], n_rows * n_cols)
        else:
            res = np.frombuffer(gz.read(n_items[0]), dtype=np.uint8)
            res = res.reshape(n_items[0], 1)
    return res


data_folder = os.path.join(os.getcwd(), 'data/mnist')

# load train and test set into numpy arrays
# note we scale the pixel intensity values to 0-1 (by dividing it with 255.0) so the model can converge faster.
X_test = load_data("{}/t10k-images-idx3-ubyte.gz".format(data_folder), False) / 255.0
y_test = load_data("{}/t10k-labels-idx1-ubyte.gz".format(data_folder), True).reshape(-1)
print("X_test Shape: {}\ny_test Shape: {}".format(X_test.shape, y_test.shape))


X_test Shape: (10000, 784)
y_test Shape: (10000,)


In [11]:
clf = joblib.load('sklearn_mnist_model.pkl')

print('Predict the test set')
y_pred = clf.predict(X_test)

# calculate accuracy on the prediction
acc = np.average(y_pred == y_test)
print('Accuracy is', acc)

np.savetxt("y_test_predicted.csv", y_pred, delimiter=",")


Predict the test set
Accuracy is 0.9193


