# Perceptron Lab





In [3]:
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.linear_model import Perceptron
import numpy as np
import matplotlib.pyplot as plt

from scipy.io import arff
import pandas as pd

## 1. (40%) Correctly implement and submit your own code for the perceptron learning algorithm. 

### Code requirements
- Shuffle the data each epoch.
- A way to create a random train/test split. Write your own. In the future you can use the scikit-learn version if you want.
- Use Stochastic/On-line training updates: Iterate and update weights after each training instance (i.e. do not attempt batch updates)
- Implement a stopping criteria: when your model has trained for a number of epochs with no significant improvement in accuracy, stop training. Note that the weights/accuracy do not usually change monotonically.
- Use your perceptron to solve the Debug data. We provide you with several parameters, and you should be able to replicate our results every time. When you are confident it is correct, run your perceptron on the Evaluation data with the same parameters, and print your final weights and accuracy.

In [34]:
class PerceptronClassifier(BaseEstimator,ClassifierMixin):
    def __init__(self, lr=.1, shuffle=True, epochs=None):
        """ 
            Initialize class with chosen hyperparameters.
        Args:
            lr (float): A learning rate / step size.
            shuffle: Whether to shuffle the training data each epoch. DO NOT 
            SHUFFLE for evaluation / debug datasets.
        """
        self.lr = lr
        self.shuffle = shuffle
        self.epochs = epochs
        self.accuracy=0

    def fit(self, X: np.ndarray, y: np.ndarray, initial_weights=None):
        """ 
            Fit the data; run the algorithm and adjust the weights to find a 
            good solution
        Args:
            X (array-like): A 2D numpy array with the training data, excluding
            targets
            y (array-like): A 2D numpy array with the training targets
            initial_weights (array-like): allows the user to provide initial 
            weights
        Returns:
            self: this allows this to be chained, e.g. model.fit(X,y).predict(X_test)
        """
        
        self.weights = self.initialize_weights(X.shape[1]) if not initial_weights else initial_weights               
        self.X = X
        self.y = y

        bias = np.ones(X.shape[0]).reshape(X.shape[0],1)
        X = np.append(X, bias, axis=1)
   
        if self.epochs == None:
            i=0

            while self.accuracy <= .95 and i <=50:
#                 print("Epoch " + str(i+1))
                self.epoch(X)
                self.accuracy = self.score(X, self.y)
                i = i + 1
#                 print("Accuracy: " + str(self.score(X, self.y)) + "\n")                 
        else:
            for i in range(self.epochs):
#                 print("Epoch " + str(i+1))
                self.epoch(X)
#                 print("Accuracy: " + str(self.score(X, self.y)) + "\n")         
        return self      

    def predict(self, X:np.ndarray):
        """ 
            Predict all classes for a dataset X
        Args:
            X (array-like): A 2D numpy array with the training data, excluding 
            targets
        Returns:
            array, shape (n_samples,)
                Predicted target values per element in X.
        """
        net  = X @ self.weights
        net = net.tolist()
        yHat = [1 if item>0 else 0 for item in net]
        return yHat

    def initialize_weights(self, n):
        """ Initialize weights for perceptron. Don't forget the bias!
        Returns:
        """
        weights = np.zeros((n+1))
        return weights

    def score(self, X, y):
        """ 
            Return accuracy of model on a given dataset. Must implement own 
            score function.
        Args:
            X (array-like): A 2D numpy array with data, excluding targets
            y (array-like): A 2D numpy array with targets
        Returns:
            score : float
                Mean accuracy of self.predict(X) wrt. y.
        """
        bias = np.ones(X.shape[0]).reshape(X.shape[0],1)
        X = np.append(X, bias, axis=1)
        yHat = self.predict(X)
        counter = 0
        for yy, yh in zip(y, yHat):
            if yy == yh:
                counter = counter + 1
        self.accuracy = counter/len(y)
        self.print_data()
        return counter/len(y)

    def _shuffle_data(self, X, y):
        """ 
            Shuffle the data! This _ prefix suggests that this method should 
            only be called internally.
            It might be easier to concatenate X & y and shuffle a single 2D 
            array, rather than shuffling X and y exactly the same way, 
            independently.
        """
        concat = np.append(X,y.reshape(len(y),1), axis=1)
        np.random.shuffle(concat)
        return concat[:, :-1], concat[:,-1]
    
    def epoch(self,X):
        if self.shuffle == True:
            X, y = self._shuffle_data(X,self.y)
        else:
            y = self.y

        net = None
        output = None

        for i in range(X.shape[0]):        
            net = np.dot(X[i],self.weights)
            output = 1 if net > 0 else 0
            dWeight = self.lr*(y[i] - output)*X[i]
            self.weights = self.weights + dWeight
#         print("Weights: " +np.array2string(self.weights, precision=2, separator=',',suppress_small=True ))     
    
    def create_test_data(self, X, y, percentage):
        #Give the percentage as a value from 0-1; 10% eqals to 0.1
        concat = np.append(X,y.reshape(len(y),1), axis=1)
        noOfDataPoints = X.shape[0] * percentage
        noOfDataPoints = int(np.rint(noOfDataPoints))
        concat = concat[np.random.choice(concat.shape[0], noOfDataPoints, replace=False)]
        return concat[:, :-1], concat[:,-1]
        def print_data(self):
        print("Weights: " +np.array2string(self.weights, precision=2, separator=',',suppress_small=True ))
        print("Accuracy: " + str(self.accuracy) + "\n")


    ### Not required by sk-learn but required by us for grading. Returns the weights.
    def get_weights(self):
        return self.weights

## 1.1 Debug and Evaluation

Debug and Evaluate your model using the parameters below

Learning Rate = 0.1 \ 
Deterministic = 10 [This means run it 10 epochs and should be the same everytime you run it] \ 
Shuffle = False \ 
Initial Weights = All zeros

---

### 1.1.1 Debug

Debug your model by running it on the [debug dataset](https://raw.githubusercontent.com/cs472ta/CS472/master/datasets/linsep2nonorigin.arff)

Expected Results:

Accuracy = [0.88]\
Final Weights = [-0.23  0.18 -0.1 ]

In [35]:
# Load debug data
data = arff.loadarff("linsep2nonorigin.arff")
df = pd.DataFrame(data[0])
X = df.iloc[:,:-1].to_numpy()
Y = df.iloc[:,-1].to_numpy().astype(np.int)

# Train on debug data
A = PerceptronClassifier(lr=0.1, shuffle=False, epochs=10)
xEval, yEval = A.create_test_data(X,Y,1)
Accuracy = A.fit(X=X, y=Y).score(xEval,yEval)

# Print accuracy and weights
A.print_data()

AttributeError: 'PerceptronClassifier' object has no attribute 'print_data'

### 1.1.2 Evaluation

We will evaluate your model based on it's performance on the [evaluation dataset](https://raw.githubusercontent.com/cs472ta/CS472/master/datasets/data_banknote_authentication.arff)

In [31]:
# Load evaluation data
data = arff.loadarff("data_banknote_authentication.arff")
df = pd.DataFrame(data[0])
X = df.iloc[:,:-1].to_numpy()
Y = df.iloc[:,-1].to_numpy().astype(np.int)

# Train on evaluation data
A = PerceptronClassifier(lr=0.1, shuffle=False, epochs=10)
A.fit(X=X, y=Y)

# Print accuracy and weights

Epoch 1
Weights: [-0.11,-0.16,-0.08,-0.6 , 1.6 ]
Accuracy: 0.4752186588921283

Epoch 2
Weights: [-1.54,-1.11,-0.93,-0.93, 2.9 ]
Accuracy: 0.9526239067055393

Epoch 3
Weights: [-2.08,-1.26,-1.56,-0.79, 2.9 ]
Accuracy: 0.9701166180758017

Epoch 4
Weights: [-1.99,-1.48,-1.61,-1.27, 3.5 ]
Accuracy: 0.9613702623906706

Epoch 5
Weights: [-2.65,-1.59,-2.11,-1.26, 3.8 ]
Accuracy: 0.9628279883381924

Epoch 6
Weights: [-3.07,-1.72,-2.12,-1.54, 4.4 ]
Accuracy: 0.9613702623906706

Epoch 7
Weights: [-3.56,-2.28,-2.65,-0.65, 3.8 ]
Accuracy: 0.9927113702623906

Epoch 8
Weights: [-3.58,-2.42,-2.53,-1.46, 4.5 ]
Accuracy: 0.9752186588921283

Epoch 9
Weights: [-3.85,-2.34,-2.83,-1.3 , 4.7 ]
Accuracy: 0.9766763848396501

Epoch 10
Weights: [-3.81,-2.84,-3.07,-1.4 , 4.9 ]
Accuracy: 0.9876093294460642



PerceptronClassifier(epochs=10, shuffle=False)

## 2. (30%) Classifying on linearly separable and non-linearly separable data

### 2.1 Create 2 datasets

- Both with 8 instances using 2 real valued inputs (ranging between -1 and 1) with 4 instances from each class. 
- One data set should be linearly separable and the other not.



In [5]:
# Create 2 datasets

### 2.2 Train on both sets with your perceptron code (with LR=.1)

In [6]:
# Train on each dataset

### 2.3 Graph the datasets and their corresponding decision line
 
 - Graph each dataset
 - Use your trained perceptrons above to determine each dataset's decision line
 - For all graphs always label the axes!
 
![Linearly Separable Data](https://raw.githubusercontent.com/rmorain/CS472-1/master/images/perceptron/linearly_separable.png)

![Not Linearly Separable](https://raw.githubusercontent.com/rmorain/CS472-1/master/images/perceptron/not_linearly_separable.png)

In [7]:
# Graph datasets and decision lines

### 2.4 Discuss why the perceptron won’t converge on non-linearly separable data.

*Discussion goes here*

## 3. (20%) Use your perceptron code to learn this version of the [voting data set](https://raw.githubusercontent.com/cs472ta/CS472/master/datasets/voting-dataset.arff).

This particular task is an edited version of the standard voting set, where we have replaced all the “don’t know” values with the most common value for the particular attribute.  

### 3.1 Create a table that reports the final training and test set accuracy and the number of epochs for each trial.

- Try it five times with different random 70/30 splits. 
- Use your own code to randomize and make splits. 
- Report the 5 trials and the average across the 5 trials in a table.  

| Trial | Training Accuracy | Test accuracy | Number of epochs |
| --- | --- | --- | --- |
| 1 | .95 | .55 | 4 |
| 2 | .85 | .45 | 6 |
| Average | .9 | .5 | 5 | 

*- As a rough sanity check, typical Perceptron test accuracies for the voting data set are 90%-98%.*


In [8]:
# Create the table any way you like

### 3.2 By looking at the weights, explain what the model has learned and how the individual input features affect the result. Which specific features are most critical for the voting task, and which are least critical? 


*Explanation goes here*

### 3.3 Make a graph of the average misclassification rate vs epochs (0th – final epoch).

- Average the misclassification rate for the training set across your 5 trials (not across 5 epochs).


![Average Misclassification Rate](https://raw.githubusercontent.com/rmorain/CS472-1/master/images/perceptron/avg_misclassification_rate_vs_epochs.png)

In [9]:
# Graph here
    
def plot_misclassification(avg_misclassification_rate):
    """
        Plots the average misclassification rate
    Args:
        avg_misclassification_rate (array-like): A 1D array or list
    """
    plt.plot(np.arange(len(avg_misclassification_rate)), avg_misclassification_rate)
    plt.title("Average Misclassification Rate vs. Epoch")
    plt.xlabel("Epoch")
    plt.ylabel("Misclassification Rate")
    plt.show()
    


## 4.1 (5%) Use the perceptron algorithm from the [scikit-learn toolkit](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Perceptron.html) to learn the voting task above.
- Report and compare your results with your own perceptron code.

In [10]:
# Load sklearn perceptron

# Train on voting dataset

*Report your comparison*

## 4.2 (5%) Use the perceptron algorithm from the [scikit-learn toolkit](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Perceptron.html) to learn one other data set of your choice.
- Try out some of the hyper-parameters that scikit-learn makes available.

In [None]:
# Load sklearn perceptron

# Train on your dataset

*Report what hyperparameters you experimented with & what performed well*

## 5. (Optional 5% extra credit) Use the perceptron rule to learn the [iris task](https://raw.githubusercontent.com/cs472ta/CS472/master/datasets/iris.arff) or some other task with more than two possible output values. 

Note that the [iris data](https://raw.githubusercontent.com/cs472ta/CS472/master/datasets/iris.arff) set has 3 output classes, and a perceptron node only has two possible outputs.  You could implement either of the two most common ways to deal with this. For testing you just execute the novel instance on each model and combine the overall results to see which output class wins.