In [None]:
# Import necessary packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.utils import shuffle
from sklearn.model_selection import KFold
np.set_printoptions(suppress = True)

import warnings
warnings.simplefilter(action='ignore')


# Importing data sets
from data_import import load_fruits, load_chess, load_music, load_lepiota

fruit_X, fruit_Y = load_fruits()
chess_X, chess_Y = load_chess()
music_X, music_Y = load_music()
lep_X, lep_Y = load_lepiota()

<hr style="color:Maroon;background-color:Maroon;border:0 none; height: 3px;">

### Perceptron Helper Functions

In [None]:
def judge(a, b):
    if a != b:
        return 1
    else:
        return 0
    
def f_perceptron(x, W, b):
    
    if x @ W + b >= 0:
        return 1
    else:
        return -1

# Calculate error given feature vectors X and labels Y
def calc_error(X, Y, W, b):
    e = np.array([])
    n = X.shape[0]
    for (xi, yi) in zip(X, Y):
        pred = f_perceptron(xi, W, b)
        e = np.append(e, judge(yi, pred))
    
    e = e.mean()
    return e

### Learning Algorithm

In [None]:
def fit_perceptron(X_train, Y_train, lam = 1):
    errors = [] # Error history
    
    # Initializing W and b
    W = np.zeros(X_train.shape[1])
    b = 0
    iterations = 0

    # Learning algorithm
    while calc_error(X_train, Y_train, W, b) > 0 and iterations <= 5000:
        print(iterations)
        for xi, yi in zip(X_train, Y_train):  

            # Compute the model prediction.
            yi_pred = f_perceptron(xi, W, b)

            # Compare prediction and label.
            is_correct = yi_pred == yi
            if is_correct:           
                continue                      
            else:
                W = W + (lam * (yi - yi_pred) * xi)
                b = b + lam * (yi - yi_pred) * 1
        iterations += 1

        # Track training errors 
        errors.append(calc_error(X_train, Y_train, W, b))
    return errors, W, b

def test_perceptron(X, y, W, b):
    # Assessing performance on X and y for learned W and b
    error = calc_error(X, y, W, b)
    acc = 1 - error
    return acc

### Cross Validation Algorithm

In [None]:
# Combines labels (Y) as another column to feature set (X)
def join_data(X, Y):
    return np.hstack((X, Y.reshape(Y.shape[0], 1)))

def cross_validate(X, y, size, lams):
    best_acc = 0
    best_lr = 0
    print('Cross validating...')
    X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = size, shuffle = True)
    data = join_data(X_train, Y_train)
    kf = KFold(n_splits = 5, shuffle = True)
    avg_accs = np.array([])
    
    for lr in lams:
        print('CV for lr: {}'.format(lr))
        for i in np.arange(1):
            accs = np.array([])
            for train_index, test_index in kf.split(data):
                errors, W, b = fit_perceptron(data[train_index][:, :data.shape[1] - 1], data[train_index][:, -1], lr)
                print('Model fit to folds')
                acc = test_perceptron(data[train_index][:, :data.shape[1] - 1], data[train_index][:, -1], W, b)
                print('Model tested on remaining fold, acc: {}'.format(acc))
                print('-' * 30)
                accs = np.append(accs, acc)
            avg_accs = np.append(avg_accs, accs.mean())
        if avg_accs.mean() > best_acc:
            best_lr = lr
            best_acc = avg_accs.mean()
    print('Best lr: {}'.format(best_lr))
    return best_lr

### Model Training Loop

In [None]:
def train_model(X, y, size):
    avg = 0
    for i in np.arange(3):
        X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = size, shuffle = True)
        errors, W, b = fit_perceptron(X_train, Y_train)
        avg += test_perceptron(X_train, Y_train, W, b)
    print(avg / 3)

### Model Testing Loop

In [None]:
def test_model(X, y, size, lr):
    avg = 0
    for i in np.arange(3):
        X_train, X_test, Y_train, Y_test = train_test_split(X, y, test_size = size, shuffle = True)
        errors, W, b = fit_perceptron(X_train, Y_train, lr)
        avg += test_perceptron(X_test, Y_test, W, b)
    print(avg / 3)

<hr style="color:Maroon;background-color:Maroon;border:0 none; height: 3px;">

### Training Performance on each Dataset

In [None]:
train_model(fruit_X, fruit_Y, 0.8)
train_model(fruit_X, fruit_Y, 0.5)
train_model(fruit_X, fruit_Y, 0.2)

train_model(chess_X, chess_Y, 0.8)
train_model(chess_X, chess_Y, 0.5)
train_model(chess_X, chess_Y, 0.2)

train_model(music_X, music_Y, 0.8)
train_model(music_X, music_Y, 0.5)
train_model(music_X, music_Y, 0.2)

train_model(lep_X, lep_Y, 0.8)
train_model(lep_X, lep_Y, 0.5)
train_model(lep_X, lep_Y, 0.2)

### Cross Validating on each Dataset

In [None]:
best_f20 = cross_validate(fruit_X, fruit_Y, 0.8, [0.0001, 0.001, 0.01, 0.1, 1])
best_f50 = cross_validate(fruit_X, fruit_Y, 0.5, [0.0001, 0.001, 0.01, 0.1, 1])
best_f80 = cross_validate(fruit_X, fruit_Y, 0.2, [0.0001, 0.001, 0.01, 0.1, 1])

best_c20 = cross_validate(chess_X, chess_Y, 0.8, [0.0001, 0.001, 0.01, 0.1, 1])
best_c50 = cross_validate(chess_X, chess_Y, 0.5, [0.0001, 0.001, 0.01, 0.1, 1])
best_c80 = cross_validate(chess_X, chess_Y, 0.2, [0.0001, 0.001, 0.01, 0.1, 1])

best_m20 = cross_validate(music_X, music_Y, 0.8, [0.0001, 0.001, 0.01, 0.1, 1])
best_m50 = cross_validate(music_X, music_Y, 0.5, [0.0001, 0.001, 0.01, 0.1, 1])
best_m80 = cross_validate(music_X, music_Y, 0.2, [0.0001, 0.001, 0.01, 0.1, 1])

best_l20 = cross_validate(lep_X, lep_Y, 0.8, [0.0001, 0.001, 0.01, 0.1, 1])
best_l50 = cross_validate(lep_X, lep_Y, 0.5, [0.0001, 0.001, 0.01, 0.1, 1])
best_l80 = cross_validate(lep_X, lep_Y, 0.2, [0.0001, 0.001, 0.01, 0.1, 1])

### Testing Performance on each Dataset

In [None]:
test_model(fruit_X, fruit_Y, 0.8, best_f20)
test_model(fruit_X, fruit_Y, 0.5, best_f50)
test_model(fruit_X, fruit_Y, 0.2, best_f80)

test_model(chess_X, chess_Y, 0.8, best_c20)
test_model(chess_X, chess_Y, 0.5, best_c50)
test_model(chess_X, chess_Y, 0.2, best_c80)

test_model(music_X, music_Y, 0.8, best_m20)
test_model(music_X, music_Y, 0.5, best_m50)
test_model(music_X, music_Y, 0.2, best_m80)

test_model(lep_X, lep_Y, 0.8, best_l20)
test_model(lep_X, lep_Y, 0.5, best_l50)
test_model(lep_X, lep_Y, 0.2, best_l80)