In [7]:
# library
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import svm
from collections import Counter
from sklearn.neighbors import KernelDensity
import tensorflow.compat.v1 as tf
tf.disable_v2_behavior()

print(tf.__version__)

2.1.0


In [8]:
# loading data
def loadMNIST():
    from tensorflow.examples.tutorials.mnist import input_data
    mnist = input_data.read_data_sets("MNIST_data/")
    return mnist

In [9]:
# KNN model
def KNN(train_image, train_label, test_image, test_label, k):

    # declare required input, output and computation
    X = tf.placeholder(dtype=tf.float32, shape=[None, 784])
    Y = tf.placeholder(dtype=tf.float32, shape=[784])
    
    # calculate distance and find the nearest point
    distance = tf.reduce_sum(tf.sqrt(tf.pow(X-Y, 2)), axis=1)
    sorted_distance = tf.sort(distance,direction = "ASCENDING")
    top_k = tf.slice(sorted_distance, begin=[0], size=[k])
    
    # initial
    init_op = [tf.global_variables_initializer()]
    prediction = []
    
    # run knn algorithm
    with tf.Session() as sess:
        sess.run(init_op)
        
        for i in range(len(test_image)):
            dist, min_k_dist = sess.run([distance, top_k], feed_dict={
                X: train_image,
                Y: test_image[i]
            })
            idx = [dist.tolist().index(i) for i in min_k_dist]
            counter = Counter(train_label[idx])
            prediction.append({
                'prediction': counter.most_common(1)[0][0], 
                'label': test_label[i]})
    df = pd.DataFrame(prediction)
    accuracy = (len(df) - len(df[df.iloc[:, 0] != df.iloc[:, 1]])) / len(df)
    return accuracy

In [10]:
def KDE(train_image, train_label, test_image, test_label):
    
    # classify different digit
    digit = [[] for i in range(10)]
    for i in range(len(train_label)):
        digit[train_label[i]].append(train_image[i])
    
    # calculate kde
    kde = [[] for i in range(10)]
    for i in range(10):
        kde[i] = KernelDensity(bandwidth=1.0, kernel='gaussian').fit(digit[i])
    
    # using kde to classify testing data
    result = []
    for i in range(len(test_label)):
        value = []
        for j in range(10):
            value.append(kde[j].score_samples([test_image[i]]))
        result.append(np.argmax(value))
    accuracy = np.sum(np.equal(result, test_label[:len(test_label)])) / len(test_label)
    return accuracy

In [11]:
def SVM(train_image, train_label, test_image, test_label):

    # constract svm model
    predictor = svm.SVC(gamma='scale', C=1.0, decision_function_shape='ovr', kernel='rbf')
    # training
    predictor.fit(train_image[:len(train_image)], train_label[:len(train_image)])
    # predict
    result = predictor.predict(test_image[:len(test_image)])
    # calculate performance
    accuracy = np.sum(np.equal(result, test_label[:len(test_label)])) / len(test_label)
    return accuracy

In [13]:
if __name__ == "__main__":
    
    # load data
    mnist = loadMNIST()
    
    # split train and test data
    train_image, train_label = mnist.train.next_batch(10000)
    test_image, test_label = mnist.test.next_batch(5000)
    
    # knn with different k
    for i in range(1,10,2):
        accuracy = KNN(train_image, train_label, test_image, test_label, k = i)
        print("K = {0} , accuracy = {1} \n".format(i, accuracy))
    
    # svm
    accuracy = SVM(train_image, train_label, test_image, test_label)
    print("svm accuracy = {0}\n".format(accuracy))
    
    # kde
    accuracy = KDE(train_image, train_label, test_image, test_label)
    print("kde accuracy = {0}\n".format(accuracy))

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
K = 1 , accuracy = 0.9406 

K = 3 , accuracy = 0.9426 

K = 5 , accuracy = 0.9446 

K = 7 , accuracy = 0.9434 

K = 9 , accuracy = 0.9404 

svm accuracy = 0.9686

kde accuracy = 0.9492

