In [1]:
import pandas as pd
import numpy as np
names=["sepal_len","sepal_wid","petal_len","petal_wid","class"]
iris = pd.read_csv("../data/iris.data",names=names)

In [2]:
print(iris[:10])
print(iris.describe())

   sepal_len  sepal_wid  petal_len  petal_wid        class
0        5.1        3.5        1.4        0.2  Iris-setosa
1        4.9        3.0        1.4        0.2  Iris-setosa
2        4.7        3.2        1.3        0.2  Iris-setosa
3        4.6        3.1        1.5        0.2  Iris-setosa
4        5.0        3.6        1.4        0.2  Iris-setosa
5        5.4        3.9        1.7        0.4  Iris-setosa
6        4.6        3.4        1.4        0.3  Iris-setosa
7        5.0        3.4        1.5        0.2  Iris-setosa
8        4.4        2.9        1.4        0.2  Iris-setosa
9        4.9        3.1        1.5        0.1  Iris-setosa
        sepal_len   sepal_wid   petal_len   petal_wid
count  150.000000  150.000000  150.000000  150.000000
mean     5.843333    3.054000    3.758667    1.198667
std      0.828066    0.433594    1.764420    0.763161
min      4.300000    2.000000    1.000000    0.100000
25%      5.100000    2.800000    1.600000    0.300000
50%      5.800000    3.0000

In [3]:
def convertIrisTo2Class(iris):
    '''
        Convert iris's three classes into two classes.
    '''
    iris.loc[iris['class']!='Iris-setosa','class']='other'
    return iris
iris = convertIrisTo2Class(iris)
print(iris)

     sepal_len  sepal_wid  petal_len  petal_wid        class
0          5.1        3.5        1.4        0.2  Iris-setosa
1          4.9        3.0        1.4        0.2  Iris-setosa
2          4.7        3.2        1.3        0.2  Iris-setosa
3          4.6        3.1        1.5        0.2  Iris-setosa
4          5.0        3.6        1.4        0.2  Iris-setosa
5          5.4        3.9        1.7        0.4  Iris-setosa
6          4.6        3.4        1.4        0.3  Iris-setosa
7          5.0        3.4        1.5        0.2  Iris-setosa
8          4.4        2.9        1.4        0.2  Iris-setosa
9          4.9        3.1        1.5        0.1  Iris-setosa
10         5.4        3.7        1.5        0.2  Iris-setosa
11         4.8        3.4        1.6        0.2  Iris-setosa
12         4.8        3.0        1.4        0.1  Iris-setosa
13         4.3        3.0        1.1        0.1  Iris-setosa
14         5.8        4.0        1.2        0.2  Iris-setosa
15         5.7        4.

In [4]:
def preprocessIris(iris):
    '''
        Preprocess iris data including normalization and label transformation, etc.
    '''
    features=iris.loc[:,["sepal_len","sepal_wid","petal_len","petal_wid"]]
    labels=iris.loc[:,["class"]]
    normalized_features=(features-features.min())/(features.max()-features.min())
    labels.loc[iris['class']!='other','class']=+1
    labels.loc[iris['class']=='other','class']=-1
#     print(normalized_features,labels)
    return pd.concat([normalized_features,labels],axis=1)

In [5]:
iris=preprocessIris(iris)
# print(iris)

In [6]:
def train_test_split(data, label, ratio=0.8):
    size=len(data)
    index=data.index
    random_index=np.random.permutation(index)
    split_point = int(size*ratio)
    
    train_x=data.loc[random_index[:split_point]].values
    train_y=label.loc[random_index[:split_point]].values
    test_x=data.loc[random_index[split_point:]].values
    test_y=label.loc[random_index[split_point:]].values
    
    return train_x,train_y,test_x,test_y

In [7]:
features = iris.loc[:,["sepal_len","sepal_wid","petal_len","petal_wid"]]
labels = iris.loc[:,"class"]
train_x,train_y, val_x, val_y = train_test_split(features,labels,0.7)

In [18]:
class Perceptron:
    '''
        Perceptron
    '''
    def __init__(self):
        self.w = None
        self.b = None
        self.errors_ = None
        
    def train(self,x, y, eta=0.1, epochs = 10):
        self.w = np.zeros(x.shape[1])
        self.b = 0
        self.errors_ = np.zeros(epochs)
        
        sample_size = len(x)
        for epoch in range(epochs):
            error = 0
            for i in range(sample_size):
                res = -y[i]*(np.dot(self.w, x[i])+ self.b)
                error += res
                
                if res >= 0:# misclassified 
                    self.w = self.w + eta * y[i]*x[i]
                    self.b = self.b + eta * y[i]
            self.errors_[epoch]=error
        print("Training errors:",self.errors_)
        return self
    
    def predict(self,x, y):
        if self.w is None or self.b is None:
            raise RuntimeError("The model is untrained yet.")
        sample_size = len(x)
        correct_num = 0
        for i in range(sample_size):
            pred = y[i]*(np.dot(self.w, x[i])+ self.b)
            if pred > 0:
                correct_num+=1
        print("The final accuracy is %2f%%" % float(correct_num*100/sample_size))

In [27]:
model = Perceptron()
model.train(train_x, train_y)
model.predict(val_x,val_y)

Training errors: [-10.7098819   -9.94031881 -11.55475046 -11.55475046 -11.55475046
 -11.55475046 -11.55475046 -11.55475046 -11.55475046 -11.55475046]
The final accuracy is 100.000000%
