In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import BatchNormalization, Conv2D, MaxPool2D, AveragePooling2D, Dense, Dropout, Flatten, Reshape
from keras.utils import to_categorical

In [None]:
def get_mnist_reduced(mnist, number_of_examples): #number of examples is 1000 in case of train and 100 in case of test
    mnist_reduced = pd.DataFrame()
    for i in range(10):
        mnist_reduced = pd.concat([mnist_reduced, mnist[(mnist.iloc[:, 0] == i).values][0:number_of_examples]], axis = 0)
    return mnist_reduced.iloc[:, 1:].values.reshape(-1, 28, 28, 1), mnist_reduced.iloc[:, 0].values
#    return mnist_reduced.iloc[:, 1:].values, mnist_reduced.iloc[:, 0].values

In [None]:
def load_data():
    train_mnist = pd.read_csv('../input/mnist-original/mnist_train.csv')
    X_train, y_train = get_mnist_reduced(train_mnist, 1000)
    test_mnist = pd.read_csv('../input/mnist-original/mnist_test.csv')
    X_test, y_test = get_mnist_reduced(test_mnist, 100)
    return X_train, y_train, X_test, y_test

X_train, y_train, X_test, y_test = load_data()
# print("Y_train classes values are:\n", y_train.value_counts()) #remove .values from get_mnist_reduced to make this works
# print("Y_test classes values are:\n", y_test.value_counts())

In [None]:
def plot_image(img, label = None):
    plt.axis('off')
    plt.imshow(img.reshape(28, 28), cmap = 'gray')
    if label is not None:
        plt.title("number is " + str(label))

plot_image(X_train[1100], y_train[1100])

In [None]:
def get_distance(x1, x2):
    sum = 0
    for i in range(len(x1)):
        sum += (x1[i] - x2[i]) ** 2
    return np.sqrt(sum)


def kmeans(X, k, max_iters):
    centroids = X[np.random.choice(range(len(X)), k, replace=False)]
    # centroids = [np.random.uniform(size=len(X[0])) for i in range(k)]

    converged = False
    current_iter = 0

    while (not converged) and (current_iter < max_iters):

        cluster_list = [[] for i in range(len(centroids))]

        for x in X:  # Go through each data point
            distances_list = []
            for c in centroids:
                distances_list.append(get_distance(c, x))
            cluster_list[int(np.argmin(distances_list))].append(x)

        cluster_list = list((filter(None, cluster_list)))

        prev_centroids = centroids.copy()

        centroids = []

        for j in range(len(cluster_list)):
            centroids.append(np.mean(cluster_list[j], axis=0))

        pattern = np.abs(np.sum(prev_centroids) - np.sum(centroids))

        print('K-MEANS: ', int(pattern))

        converged = (pattern == 0)

        current_iter += 1

    return np.array(centroids), [np.std(x) for x in cluster_list]

In [None]:
class RBF:

    def __init__(self, X, y, tX, ty, num_of_classes,
                 k, std_from_clusters=True):
        self.X = X
        self.y = y

        self.tX = tX
        self.ty = ty

        self.number_of_classes = num_of_classes
        self.k = k
        self.std_from_clusters = std_from_clusters

    def convert_to_one_hot(self, x, num_of_classes):
        arr = np.zeros((len(x), num_of_classes))
        for i in range(len(x)):
            c = int(x[i])
            arr[i][c] = 1
        return arr

    def get_rbf(self, x, c, s):
        distance = get_distance(x, c)
        return 1 / np.exp(-distance / s ** 2)

    def get_rbf_as_list(self, X, centroids, std_list):
        RBF_list = []
        for x in X:
            RBF_list.append([self.get_rbf(x, c, s) for (c, s) in zip(centroids, std_list)])
        return np.array(RBF_list)

    def fit(self):

        self.centroids, self.std_list = kmeans(self.X, self.k, 1000)

        if not self.std_from_clusters:
            dMax = np.max([get_distance(c1, c2) for c1 in self.centroids for c2 in self.centroids])
            self.std_list = np.repeat(dMax / np.sqrt(2 * self.k), self.k)

        RBF_X = self.get_rbf_as_list(self.X, self.centroids, self.std_list)

        self.w = np.linalg.pinv(RBF_X.T @ RBF_X) @ RBF_X.T @ self.convert_to_one_hot(self.y, self.number_of_classes)

        RBF_list_tst = self.get_rbf_as_list(self.tX, self.centroids, self.std_list)

        self.pred_ty = RBF_list_tst @ self.w

        self.pred_ty = np.array([np.argmax(x) for x in self.pred_ty])

        diff = self.pred_ty - self.ty

        print('Accuracy: ', len(np.where(diff == 0)[0]) / len(diff))

In [None]:
RBF_CLASSIFIER = RBF(X_train, y_train,X_test, y_test, num_of_classes=10,
                     k=1000, std_from_clusters=False)

RBF_CLASSIFIER.fit()