# Mustererkennung/Machine Learning - Assignment 8

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split

In [14]:
class Classifier():
    def error_rate(self, label, pred):
        return 1 - self.accuracy(label, pred)
    
    def accuracy(self, label, pred):
        return np.mean(label == pred)

In [15]:
data = pd.read_csv("./Data/iris.data", header=None)
y = data[4].to_numpy()
X = data.drop(4,axis=1).to_numpy()

In [16]:
X_train, X_test, y_train, y_test = train_test_split(data[list(range(4))], data[4], test_size=0.2, random_state=None, stratify=data[4])

X_train_setosa = X_train[y_train=='Iris-setosa'].to_numpy()
X_train_versicolor = X_train[y_train=='Iris-versicolor'].to_numpy()
X_train_virginica = X_train[y_train=='Iris-virginica'].to_numpy()

y_train_setosa = y_train[y_train=='Iris-setosa'].to_numpy()
y_train_versicolor = y_train[y_train=='Iris-versicolor'].to_numpy()
y_train_virginica = y_train[y_train=='Iris-virginica'].to_numpy()

X_test_setosa_v_v = X_test.to_numpy()
y_test_setosa_v_v = (y_test == 'Iris-setosa').astype(int).to_numpy()

X_test_versicolor_virginica = X_test[y_test!='Iris-setosa'].to_numpy()
y_test_versicolor_virginica = (y_test[y_test!='Iris-setosa'] == 'Iris-versicolor').astype(int).to_numpy()

In [17]:
def rand_vec(vec):
    return vec[np.random.randint(0,vec.shape[0])]

In [18]:
class Perceptron(Classifier):
    def fit(self, X, y, pocket=False, iterations=1000):
        self.classes = np.unique(y)

        datapoints, dims = X.shape
            
        self._appendOnes(X)
        X[y==self.classes[1]] *= -1
        self.omega = rand_vec(X)
        
        if pocket: 
            best_omega = self.omega
            old_len = X.shape[0]
        
        changes = 0
        while True:
            wrong = X[X.dot(self.omega.T)<0]
            if len(wrong) == 0:
                break
            if pocket:
                if changes >= iterations:
                    break
                if len(wrong) < old_len:
                    old_len = len(wrong)
                    best_omega = self.omega

            self.omega += rand_vec(wrong)
            changes += 1
        
        if pocket: self.omega = best_omega
        wrong = X[X.dot(self.omega.T)<0]
        print("Finished training; Wrong:", len(wrong), "Total Changes:", changes)
        print(self.omega)
        
    def _appendOnes(self, X):
        datapoints, dims = X.shape
        np.concatenate((np.ones((datapoints, 1)), X),axis=1)
    
    def predict(self,X):
        self._appendOnes(X)
        y = X.dot(self.omega)
        y = np.where(y>0, self.classes[0],self.classes[1])

        return y

In [19]:
for i in ['Iris-versicolor','Iris-virginica']:
    X_ = X[('Iris-setosa' == y) | (i == y)]
    y_ = y[('Iris-setosa' == y) | (i == y)]

    X_train, X_test, y_train, y_test = train_test_split(X_,y_)
    
    perc = Perceptron()
    perc.fit(X_train,y_train)
    
    pred = perc.predict(X_test)
    acc = perc.accuracy(y_test,pred)
    
    print("Accuracy:", acc)

Finished training; Wrong: 0 Total Changes: 6
[ 1.7  4.9 -7.8 -3.2]
Accuracy: 1.0
Finished training; Wrong: 0 Total Changes: 4
[ 2.   4.6 -6.  -3.3]
Accuracy: 1.0


In [20]:
j = 'Iris-versicolor'
i = 'Iris-virginica'

X_ = X[(j == y) | (i == y)]
y_ = y[(j == y) | (i == y)]

X_tr,X_te,y_tr,y_te = train_test_split(X_,y_,test_size=0.4)

perc = Perceptron()
perc.fit(X_tr,y_tr,pocket=True,iterations=1000000)

pred = perc.predict(X_te)
acc = perc.accuracy(y_te,pred)

print("Accuracy:", acc)

Finished training; Wrong: 0 Total Changes: 198
[ 29.6  30.6 -42.5 -37.7]
Accuracy: 0.925
