# Übung 6 Perceptron - Rainier Robles & Valentin Wolf

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split

In [2]:
data = pd.read_csv("iris.data", header=None)
y = data[4].as_matrix()
X = data.drop(4,axis=1).as_matrix()

In [3]:
class Classifier():
    def error_rate(self,truth, pred):
        """gets two vectors, returns (wrongly classified / total)"""
        return 1 - self.accuracy(truth, pred)
    
    def accuracy(self,truth,pred):
        return np.mean(truth == pred)

In [4]:
class Perceptron(Classifier):
    def fit(self,X,y,pocket=False,iterations=1000):
        self.classes = np.unique(y)
        assert(len(self.classes) == 2)
        datapoints,dims = X.shape
        
        def randi(vec):
            return vec[np.random.randint(0,vec.shape[0])]
            
        self._appendOnes(X)
        X[y==self.classes[1]] *= -1
        self.omega = randi(X)
        
        if pocket: 
            best_omega = self.omega
            old_len = X.shape[0]
        
        changes = 0
        while True:
            wrong = X[X.dot(self.omega.T)<0]
            if len(wrong) == 0:
                break
            if pocket:
                if changes >= iterations:
                    break
                if len(wrong) < old_len:
                    old_len = len(wrong)
                    best_omega = self.omega

            self.omega += randi(wrong)
            changes += 1
        
        if pocket: self.omega = best_omega
        wrong = X[X.dot(self.omega.T)<0]
        print("Finished training; Wrong:", len(wrong), "Total Changes:", changes)
        print(self.omega)
        
    def _appendOnes(self,X):
        datapoints,dims = X.shape
        np.concatenate((np.ones((datapoints,1)),X),axis=1)
    
    def predict(self,X):
        self._appendOnes(X)
        y = X.dot(self.omega)
        y = np.where(y>0, self.classes[0],self.classes[1])
        #y[y>0] = self.classes[0]
        #y[y<0] = self.classes[1]
        return y

In [5]:
for i in ['Iris-versicolor','Iris-virginica']:
    X_ = X[('Iris-setosa' == y) | (i == y)]
    y_ = y[('Iris-setosa' == y) | (i == y)]

    X_tr,X_te,y_tr,y_te = train_test_split(X_,y_)
    
    perc = Perceptron()
    perc.fit(X_tr,y_tr)
    
    pred = perc.predict(X_te)
    acc = perc.accuracy(y_te,pred)
    
    print("Accuracy:", acc)

Finished training; Wrong: 0 Total Changes: 11
[  4.3   7.7 -11.5  -4.4]
Accuracy: 0.96
Finished training; Wrong: 0 Total Changes: 4
[ 1.5  4.7 -6.5 -2.5]
Accuracy: 1.0


In [10]:
j = 'Iris-versicolor'
i = 'Iris-virginica'

X_ = X[(j == y) | (i == y)]
y_ = y[(j == y) | (i == y)]

X_tr,X_te,y_tr,y_te = train_test_split(X_,y_,test_size=0.4)

perc = Perceptron()
perc.fit(X_tr,y_tr,pocket=True,iterations=1000000)

pred = perc.predict(X_te)
acc = perc.accuracy(y_te,pred)

print("Accuracy:", acc)

Finished training; Wrong: 5 Total Changes: 1000000
[ 111.8   94.8 -125.6 -196. ]
Accuracy: 0.975
