In [1]:
import numpy as np
import pandas as pd
    
def read_data(filename, phase):
    f = open(filename)
    data = f.readlines()
    f.close()
    
    if phase == "train":
        num_feat, _, _ = data[0].split()
        print('num_feat:', num_feat)
        data = data[1:]
    
    data = [[(col) for col in each_data.split()] for each_data in data]
    data = np.array(data).astype(float)
    features = np.ones([data.shape[0], data.shape[1]])
    classes = data[:, -1]
    classes = classes - 1.
    
    features[:, 0:-1] = data[:, :-1]
    
    return features, classes
    

In [2]:
def get_pred_level(weight, feature):
    cos_sim = np.dot(weight, feature)
    
    # positive level    
    if cos_sim >= 0.:
        return 1    
    # negative level
    else:
        return 0
        
def get_test_acc(weight, all_feature, classes):
    df = pd.DataFrame(
    columns=['sample no', 'feature values', 'actual class', 'predicted class'])

    correct = 0    
    for i, feature in enumerate(features):
        pred_val = get_pred_level(weight, feature)
        if classes[i] == pred_val:
            correct += 1
        else:
            new_row = {'sample no':i,
                       'feature values':feature,
                       'actual class':classes[i], 
                       'predicted class':pred_val
                      }
            df = df.append(new_row, ignore_index=True)            
        
    total = all_feature.shape[0]   
    print("accuracy",(correct*100)/total,"%")
    return (correct*100)/total, correct, total, df

In [3]:
def basic_perceptron(all_feature, classes, rho=1, max_epoch=None):    
#     weight = np.zeros([all_feature.shape[1]])
    weight = np.random.rand(all_feature.shape[1])
    
    epoch = 1
    while True:
        converge = True
        temp_weight = np.zeros([all_feature.shape[1]])
        for i, feature in enumerate(all_feature):
            pred_val = get_pred_level(weight, feature) 
            
            if classes[i] == 1. and pred_val == 0:
                converge = False
                temp_weight = temp_weight - feature
                
            elif classes[i] == 0. and pred_val == 1:
                converge = False
                temp_weight = temp_weight + feature
                
        if converge:
            print('finishing at epoch:', epoch, '\nweights:', weight)
            break
            
        weight = weight - rho * temp_weight
        
        epoch += 1
        if max_epoch is not None and epoch > max_epoch:
            print('finishing at epoch:', epoch, '\nweights:', weight)
            break
    return weight

In [4]:
def reward_punish_perceptron(all_feature, classes, rho=1, max_epoch=None):    
#     weight = np.zeros([all_feature.shape[1]])
    weight = np.random.rand(all_feature.shape[1])
    
    epoch = 1
    while True:
        converge = True
        
        for i, feature in enumerate(all_feature):
            pred_val = get_pred_level(weight, feature) 
            
            if classes[i] == 1. and pred_val == 0:
                converge = False
                weight = weight + rho * feature
                
            elif classes[i] == 0. and pred_val == 1:
                converge = False
                weight = weight - rho * feature
                
        if converge:
            print('finishing at epoch:', epoch, '\nweights:', weight)
            break
                    
        epoch += 1
        if max_epoch is not None and epoch > max_epoch:
            print('finishing at epoch:', epoch, '\nweights:', weight)
            break
    return weight

In [5]:
def pocket_perceptron(all_feature, classes, rho=1, max_epoch=None):    
#     weight = np.zeros([all_feature.shape[1]])
    weight = np.random.rand(all_feature.shape[1])
    best_weight = np.copy(weight)
    epoch = 1
    H_s = 0
    while True:
        converge = True
        temp_weight = np.zeros([all_feature.shape[1]])
        H = 0
        for i, feature in enumerate(all_feature):
            pred_val = get_pred_level(weight, feature) 
            
            if classes[i] == 1. and pred_val == 0:
                converge = False
                temp_weight = temp_weight - feature
                
            elif classes[i] == 0. and pred_val == 1:
                converge = False
                temp_weight = temp_weight + feature
                
            else:
                H += 1
                
        if converge:
            print('finishing at epoch:', epoch, '\nweights:', weight)
            break
            
        if H > H_s:
            best_weight = np.copy(weight)
            H_s = H
            print("new best at epoch",  epoch)
            
        weight = weight - rho * temp_weight
        
        epoch += 1
        if max_epoch is not None and epoch > max_epoch:
            print('finishing at epoch:', epoch, '\nweights:', weight)
            break
            
    return best_weight


In [9]:
rho = 1

train_type = basic_perceptron
features, labels = read_data(filename='trainLinearlySeparable.txt', phase="train")
weight = train_type(all_feature=features, classes=labels, rho=rho, max_epoch=None)
acc, correct, total, _ = get_test_acc(weight, features, labels)
features, labels = read_data(filename='testLinearlySeparable.txt', phase="test")
acc, correct, total, df= get_test_acc(weight, features, labels)
df.to_csv('basic_perceptron_'+'testLinearlySeparable.csv', encoding='utf-8')

features, labels = read_data(filename='trainLinearlyNonSeparable.txt', phase="train")
weight = train_type(all_feature=features, classes=labels, rho=rho, max_epoch=1000)
acc, correct, total, _ = get_test_acc(weight, features, labels)
features, labels = read_data(filename='testLinearlyNonSeparable.txt', phase="test")
acc, correct, total, df= get_test_acc(weight, features, labels)
df.to_csv('basic_perceptron_'+'testLinearlyNonSeparable.csv', encoding='utf-8')

num_feat: 4
finishing at epoch: 251 
weights: [-1.55564192e+03 -7.85514757e+01  1.41116679e+01  1.65957742e+03
 -1.62346727e+04]
accuracy 100.0 %
accuracy 100.0 %
num_feat: 4
finishing at epoch: 1001 
weights: [   111.0394645   -1589.42847173   -221.98490169   1814.82941817
 -11128.73355275]
accuracy 98.25 %
accuracy 97.75 %


In [10]:
rho = 1
train_type = reward_punish_perceptron

features, labels = read_data(filename='trainLinearlySeparable.txt', phase="train")
weight = train_type(all_feature=features, classes=labels, rho=rho, max_epoch=None)
acc, correct, total, _ = get_test_acc(weight, features, labels)
features, labels = read_data(filename='testLinearlySeparable.txt', phase="test")
acc, correct, total, df= get_test_acc(weight, features, labels)
df.to_csv('reward_punish_perceptron_'+'testLinearlySeparable.csv', encoding='utf-8')

features, labels = read_data(filename='trainLinearlyNonSeparable.txt', phase="train")
weight = train_type(all_feature=features, classes=labels, rho=rho, max_epoch=1000)
acc, correct, total, _ = get_test_acc(weight, features, labels)
features, labels = read_data(filename='testLinearlyNonSeparable.txt', phase="test")
acc, correct, total, df= get_test_acc(weight, features, labels)
df.to_csv('reward_punish_perceptron_'+'testLinearlyNonSeparable.csv', encoding='utf-8')

num_feat: 4
finishing at epoch: 5 
weights: [ -10.94248493   -6.44432702    3.12258804   11.85435893 -110.83111405]
accuracy 100.0 %
accuracy 99.5 %
num_feat: 4
finishing at epoch: 1001 
weights: [  49.44217652  -77.14755163   41.35455457   30.99898074 -428.60432708]
accuracy 98.5 %
accuracy 97.5 %


In [11]:
rho = 1
train_func = pocket_perceptron

features, labels = read_data(filename='trainLinearlySeparable.txt', phase="train")
weight = train_func(all_feature=features, classes=labels, rho=rho, max_epoch=None)
acc, correct, total, _ = get_test_acc(weight, features, labels)
features, labels = read_data(filename='testLinearlySeparable.txt', phase="test")
acc, correct, total, df= get_test_acc(weight, features, labels)
df.to_csv('pocket_perceptron_'+'testLinearlySeparable.csv', encoding='utf-8')

features, labels = read_data(filename='trainLinearlyNonSeparable.txt', phase="train")
weight = train_func(all_feature=features, classes=labels, rho=rho, max_epoch=1000)
acc, correct, total, _ = get_test_acc(weight, features, labels)
features, labels = read_data(filename='testLinearlyNonSeparable.txt', phase="test")
acc, correct, total, df= get_test_acc(weight, features, labels)
df.to_csv('pocket_perceptron_'+'testLinearlyNonSeparable.csv', encoding='utf-8')

num_feat: 4
new best at epoch 1
new best at epoch 56
new best at epoch 265
new best at epoch 266
finishing at epoch: 277 
weights: [ -1620.84674029    -24.84645594    100.28199048   1869.67444516
 -17224.4111231 ]
accuracy 99.75 %
accuracy 99.75 %
num_feat: 4
new best at epoch 1
new best at epoch 29
new best at epoch 103
finishing at epoch: 1001 
weights: [   134.96632948  -1775.58655878   -271.36645281   1956.84090909
 -11328.64448704]
accuracy 98.5 %
accuracy 97.75 %
