In [2]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
import math
import numpy as np
from scipy.spatial import distance

import kernels
import utils
import trainer

In [3]:
iris = datasets.load_iris()
X = iris.data
Y = iris.target
train_data, test_data, train_labels, test_labels = train_test_split(X, Y, test_size = 0.3, random_state = 0)

In [4]:
h = 0
n = 0

In [5]:
def dist(sample1,sample2,n=2):
    return distance.minkowski(sample1,sample2,n)

def max_distance(n=2):
    mx = 0
    for x in train_data:
        for y in train_data:
            mx = max(mx, dist(x,y,n))
    return mx

In [6]:
def predict(sample, weights, kernel=kernels.Gauss):
    resVec = np.zeros(Y.shape[0])
    for index in range(len(train_data)):
        label = train_labels[index]
        train_sample = train_data[index]
        weight = weights[index]
        resVec[label] +=  weight * kernel(dist(train_sample,sample,n) / h)
    return np.argmax(resVec)  

def train_weights(times=20, kernel=kernels.Gauss):
    weights = np.zeros_like(train_labels)
    weights[0] = 1

    for repeat in range(times):
        copy_weights = weights.copy()
        for index in range(len(train_data)):
            sample = train_data[index]
            label = train_labels[index]
            if (predict(sample, weights, kernel) != label):
                weights[index] += 1
        if (np.array_equal(copy_weights,weights)):
            print("Ready!")
            break
        print("Epoch " + str(repeat +1) +" completed")

    return weights

In [7]:
def accuracy(weights, kernel=kernels.Gauss):
    trues = 0
    for index in range(len(test_labels)):
        sample = test_data[index]
        label = test_labels[index]
        if (predict(sample, weights, kernel) == label):
            trues += 1
        else:
            print("Wrong index: " + str(index) +  " Predict: " + str(predict(sample, weights, kernel))  + " Truth: "  + str(label))
    print(str(trues)+"/"+str(len(test_labels)))
    return trues/len(test_labels)


In [8]:
n = 2
h = max_distance(n)
metric = kernels.Gauss
epoch = 20

In [9]:
weights = train_weights(epoch, metric)
print(weights)
accuracy(weights, metric)

Epoch 1 completed
Epoch 2 completed
Epoch 3 completed
Epoch 4 completed
Epoch 5 completed
Epoch 6 completed
Epoch 7 completed
Epoch 8 completed
Epoch 9 completed
Epoch 10 completed
Epoch 11 completed
Epoch 12 completed
Ready!
[1 1 0 2 0 2 0 1 0 1 0 9 0 9 0 1 7 1 0 0 0 1 1 3 1 0 0 6 1 1 1 1 2 1 0 2 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0
 8 0 0 0 0 0 0 0 8 0 0 0 0 1 4 8 0 5 0 0 0 0 6 0 3 2 0 6 0 0 0]
Wrong index: 24 Predict: 1 Truth: 2
Wrong index: 37 Predict: 2 Truth: 1
43/45


0.9555555555555556