In [2]:
import numpy as np
import scipy
import pandas as pd
import time
import datetime as dt
import matplotlib
import matplotlib.pyplot as plt
import os
%matplotlib inline

In [3]:
from sklearn import datasets, svm, metrics
from sklearn.datasets import fetch_mldata

mnist = fetch_mldata('MNIST original', data_home=os.getcwd())

In [9]:
mnist

{'DESCR': 'mldata.org dataset: mnist-original',
 'COL_NAMES': ['label', 'data'],
 'target': array([0., 0., 0., ..., 9., 9., 9.]),
 'data': array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)}

In [32]:
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    import itertools
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [None]:
# data is 70,000 x 784 array where each row represents pixels from a (28 x 28) = 784 image
images = mnist.data
targets = mnist.target

# Let's have a look at the random 16 images, 
# We have to reshape each data row, from flat array of 784 int to 28x28 2D array

# pick  random indexes from 0 to size of our dataset
# show_some_digits(images,targets)

# #---------------- classification begins -----------------
# scale data for [0,255] -> [0,1]
# sample smaller size for testing
rand_idx = np.random.choice(images.shape[0],10000)
X_data =images[rand_idx]/255.0
Y = targets[rand_idx]

#full dataset classification
# X_data = images/255.0
# Y = targets

# #split data to train and test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_data, Y, test_size=0.15, random_state=42)

param_C = 5
param_gamma = 0.05
classifier = svm.SVC(C=param_C, gamma=param_gamma)

# We learn the digits on train part
start_time = dt.datetime.now()
print('Start learning at {}'.format(str(start_time)))
classifier.fit(X_train, y_train)
end_time = dt.datetime.now() 
print('Stop learning {}'.format(str(end_time)))
elapsed_time = end_time - start_time
print('Elapsed learning {}'.format(str(elapsed_time)))


# predict the value of the test
expected = y_test
predicted = classifier.predict(X_test)

# show_some_digits(X_test,predicted,title_text="Predicted {}")

print("Classification report for classifier %s:\n%s\n" % (classifier, metrics.classification_report(expected, predicted)))

cm = metrics.confusion_matrix(expected, predicted)
# print("Confusion matrix:\n%s" % cm)

plt.figure()
plot_confusion_matrix(cm, classes=Y, title='Confusion matrix, without normalization')

print("Accuracy={}".format(metrics.accuracy_score(expected, predicted)))

Start learning at 2018-06-11 16:55:24.055005
Stop learning 2018-06-11 17:37:53.928290
Elapsed learning 0:42:29.873285
Classification report for classifier SVC(C=5, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.05, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False):
             precision    recall  f1-score   support

        0.0       0.99      0.99      0.99      1024
        1.0       0.99      0.99      0.99      1185
        2.0       0.98      0.99      0.98      1051
        3.0       0.98      0.98      0.98      1057
        4.0       0.99      0.99      0.99       964
        5.0       0.98      0.98      0.98       964
        6.0       0.99      0.99      0.99      1085
        7.0       0.99      0.98      0.99      1128
        8.0       0.97      0.98      0.97      1037
        9.0       0.98      0.97      0.98      1005

avg / total       0.99      0.99      

In [31]:
# CNN Method
from sklearn import model_selection
import keras
from keras.utils.np_utils import to_categorical
import cv2
from cnn import CNN
import argparse
from keras import optimizers

mnist_data = images.reshape((images.shape[0], 28, 28))
mnist_data = mnist_data[:, np.newaxis, :, :]
train_img, test_img, train_labels, test_labels = model_selection.train_test_split(mnist_data/255.0, targets.astype("int"), test_size=0.1)
img_rows,img_columns = 28,28
total_classes = 10
train_labels = to_categorical(train_labels, 10)
test_labels = to_categorical(test_labels, 10)


sgd = optimisers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
clf = CNN(width=img_rows, height=img_columns, depth=1, total_classes=total_classes, weightsPath=args["weights"])
clf.compile(loss="categorical_crossentropy", optimizer=sgd, metrics=["accuracy"])

b_size = 128 # Batch size
num_epoch = 20 # Number of epochs
verb = 1 # Verbose

ModuleNotFoundError: No module named 'cnn'