In [None]:
import numpy as np
import sklearn.datasets as skdata
from sklearn.linear_model import Perceptron

In [None]:
'''
Name: Escalante, Tomas

Collaborators: N/A

Collaboration details: N/A

Summary:

You should answer the questions:
1) What did you do in this assignment?
   I created a multiclass perceptron and tested its performance on the iris and wine datasets.
   
2) How did you do it?
   I created the multiclass perceptron by using the concepts of binary perceptron. Instead of
   using one set of weights, I created three and got values by multiplying each feature vector
   by each set of weights and keeping the one with the highest value as the prediction.
   
3) What are the constants and hyper-parameters you used?
   I tested both datasets with tolerances of 1.00, 0.50, 0.10, and 0.01. The iris dataset 
   produced better results with larger tolerances between 0.50 and 1.00. The wine dataset did 
   better with smaller tolerances between 0.1 and 0.5. 
   Rather than initialize the weights as zeros, I made them 0.01, but after comparing the two,
   they performed the same.
   My training steps didn't seem to effect very much between 10 and 20.

Scores:

Results on the iris dataset using scikit-learn Perceptron model
Training set mean accuracy: 0.6694
Validation set mean accuracy: 0.6667
Testing set mean accuracy: 0.6429
Results on the iris dataset using our Perceptron model trained with 10 steps and tolerance of 1
Training set mean accuracy: 0.6694
Validation set mean accuracy: 0.6667
Results on the iris dataset using our Perceptron model trained with 20 steps and tolerance of 1
Training set mean accuracy: 0.6694
Validation set mean accuracy: 0.6667
Results on the iris dataset using our Perceptron model trained with 60 steps and tolerance of 1
Training set mean accuracy: 0.8760
Validation set mean accuracy: 0.9333
Using best model trained with 60 steps and tolerance of 1
Testing set mean accuracy: 0.8571
Results on the wine dataset using scikit-learn Perceptron model
Training set mean accuracy: 0.5625
Validation set mean accuracy: 0.4118
Testing set mean accuracy: 0.4706
Results on the wine dataset using our Perceptron model trained with 10 steps and tolerance of 0.01
Training set mean accuracy: 0.2708
Validation set mean accuracy: 0.2353
Results on the wine dataset using our Perceptron model trained with 20 steps and tolerance of 0.01
Training set mean accuracy: 0.2708
Validation set mean accuracy: 0.2353
Results on the wine dataset using our Perceptron model trained with 43 steps and tolerance of 0.01
Training set mean accuracy: 0.3889
Validation set mean accuracy: 0.2941
Using best model trained with 43 steps and tolerance of 0.01
Testing set mean accuracy: 0.4706
'''

In [None]:
'''
Implementation of Perceptron for multi-class classification
'''
class PerceptronMultiClass(object):

    def __init__(self):
        # Define private variables, weights and number of classes
        self.__weights = None
        self.__n_class = None

    def __update(self, x, y):
        '''
        Update the weight vector during each training iteration

        Args:
            x : numpy
                d x N feature vector
            y : numpy
                1 x N ground-truth label
        '''
        # TODO: Implement the member update function
        
        # Augment the feature vector x (d, N) with threshold (1, N)
        threshold = 0.5 * np.ones([1, x.shape[1]]) # (1, N)
        x = np.concatenate([threshold, x], axis = 0) # (d+1, N)
        
        # Walk through every example and check if they are incorrect
        for i in range(x.shape[1]):
            # x is (d+1, N), so the shape is (d+1), weights is (c, d+1, 1)
            x_i = np.expand_dims(x[:, i], axis = -1) # has shape of (d+1, 1)
            
            predictions = []
            for j in range(len(self.__weights)):
                pred = np.matmul(self.__weights[j].T, x_i) # (1, d+1) * (d+1, 1) = (1, 1)
                predictions.append(pred)
                
            prediction = np.argmax(predictions)
            
            # Check if prediction is equal or not equal to ground truth y
            if prediction != y[i]:
                # w(c_hat)^(t+1) = w(c_hat)^(t) - x^i
                # w(star)^(t+1) = w(star)^(t) + x^i
                self.__weights[prediction] = self.__weights[prediction] - x_i
                self.__weights[y[i]] = self.__weights[y[i]] + x_i
                

    def fit(self, x, y, T, tol):
        '''
        Fits the model to x and y by updating the weight vector
        based on mis-classified examples for t iterations until convergence

        Args:
            x : numpy
                d x N feature vector
            y : numpy
                1 x N ground-truth label
            T : int
                number of iterations to optimize perceptron
            tol : float
                change of loss tolerance, if greater than loss + tolerance, then stop
        '''
        # TODO: Implement the fit function
        
        # initialize the weights
        self.__n_class = np.unique(y)
        self.__weights = 0.001 * np.ones([len(self.__n_class), x.shape[0] + 1, 1]) # (c, d+1, 1)
        self.__weights[:, 0, 0] = -0.001
        
        
        # Initialize previous loss and weights
        prev_loss = 2.0
        prev_weights = np.copy(self.__weights)
        
        for t in range(T):
            # Compute the loss
            predictions = self.predict(x) # (1, N)
            
            # l = 1/N \sum_i^N I(h(x^i) != y^i)
            loss = np.mean(np.where(predictions != y, 1.0, 0.0))
            print('t={} loss={}'.format(t+1, loss))
            
            # Stopping conditions
            if loss == 0.0:
                break
            elif loss > prev_loss + tol and t > 2:
                self.__weights = prev_weights
                break
            
            # Update previous loss and previous weights
            prev_loss = loss
            prev_weights = np.copy(self.__weights)
            
            # Updates our weight vector based on what we got wrong
            self.__update(x, y)

    def predict(self, x):
        '''
        Predicts the label for each feature vector x

        Args:
            x : numpy
                d x N feature vector

        Returns:
            numpy : 1 x N label vector
        '''
        # TODO: Implement the predict function
        
        # [w0, w1, w2, w3, ..., wd] (d+1, N)
        # [..., x1, x2, x3, ..., xd] (d, N)
        # What is the shape of threshold? (1, N)
        
        threshold = 0.5 * np.ones([1, x.shape[1]]) # (1, N)
        
        # Augment the the features x with the threshold
        x = np.concatenate([threshold, x], axis = 0) # (d+1, N)
        
        # Predict using w^Tx: 
        # Predict the label for x_i
        all_predictions = []
        for i in range(x.shape[1]):
            # x is (d+1, N), so the shape is (d+1), weights is (c, d+1, 1)
            x_i = np.expand_dims(x[:, i], axis = -1) # has shape of (d+1, 1)
            
            predictions = []
            for j in range(len(self.__weights)):
                pred = np.matmul(self.__weights[j].T, x_i) # (1, d+1) * (d+1, 1) = (1, 1)
                predictions.append(pred)
                
            prediction = np.argmax(predictions)
            all_predictions.append(prediction)
            
        return all_predictions # (1, N)

    def score(self, x, y):
        '''
        Predicts labels based on feature vector x and computes the mean accuracy
        of the predictions

        Args:
            x : numpy
                d x N feature vector
            y : numpy
                1 x N ground-truth label

        Returns:
            float : mean accuracy
        '''
        # TODO: Implement the score function
        
        predictions = self.predict(x) # (1, N) of {0, 1, 2}
        
        # Comparing if predictions and y are the same
        scores = np.where(predictions == y, 1.0, 0.0)
        
        # Return the mean accuracy
        return np.mean(scores)


def split_dataset(x, y, n_sample_train_to_val_test=8):
    '''
    Helper function to splits dataset into training, validation and testing sets

    Args:
        x : numpy
            d x N feature vector
        y : numpy
            1 x N ground-truth label
        n_sample_train_to_val_test : int
            number of training samples for every validation, testing sample

    Returns:
        x_train : numpy
            d x n feature vector
        y_train : numpy
            1 x n ground-truth label
        x_val : numpy
            d x m feature vector
        y_val : numpy
            1 x m ground-truth label
        x_test : numpy
            d x m feature vector
        y_test : numpy
            1 x m ground-truth label
    '''
    n_sample_interval = n_sample_train_to_val_test + 2

    train_idx = []
    val_idx = []
    test_idx = []
    for idx in range(x.shape[0]):
        if idx and idx % n_sample_interval == (n_sample_interval - 1):
            val_idx.append(idx)
        elif idx and idx % n_sample_interval == 0:
            test_idx.append(idx)
        else:
            train_idx.append(idx)

    x_train, x_val, x_test = x[train_idx, :], x[val_idx, :], x[test_idx, :]
    y_train, y_val, y_test = y[train_idx], y[val_idx], y[test_idx]

    return x_train, y_train, x_val, y_val, x_test, y_test

In [None]:
if __name__ == '__main__':

    iris_data = skdata.load_iris()
    wine_data = skdata.load_wine()

    datasets = [iris_data, wine_data]
    tags = ['iris', 'wine']

    # TODO: Experiment with 3 different max training steps (T) for each dataset
    train_steps_iris = [10, 20, 48]
    train_steps_wine = [10, 20, 50]

    train_steps = [train_steps_iris, train_steps_wine]

    # TODO: Set a tolerance for each dataset
    tol_iris = 1
    tol_wine = 0.5

    tols = [tol_iris, tol_wine]

    for dataset, steps, tol, tag in zip(datasets, train_steps, tols, tags):
        # Split dataset into 80 training, 10 validation, 10 testing
        x = dataset.data
        y = dataset.target
        x_train, y_train, x_val, y_val, x_test, y_test = split_dataset(
            x=x,
            y=y,
            n_sample_train_to_val_test=8)

        '''
        Trains and tests Perceptron model from scikit-learn
        '''
        model = Perceptron(penalty=None, alpha=0.0, tol=tol)
        # Trains scikit-learn Perceptron model
        model.fit(x_train, y_train)

        print('Results on the {} dataset using scikit-learn Perceptron model'.format(tag))

        # Test model on training set
        scores_train = model.score(x_train, y_train)
        print('Training set mean accuracy: {:.4f}'.format(scores_train))

        # Test model on validation set
        scores_val = model.score(x_val, y_val)
        print('Validation set mean accuracy: {:.4f}'.format(scores_val))

        # Test model on testing set
        scores_test = model.score(x_test, y_test)
        print('Testing set mean accuracy: {:.4f}'.format(scores_test))

        '''
        Trains, validates, and tests our Perceptron model for multi-class classification
        '''
        # TODO: obtain dataset in correct shape (d x N)
        x_train = np.transpose(x_train, axes=(1, 0))
        x_val = np.transpose(x_val, axes=(1, 0))
        x_test = np.transpose(x_test, axes=(1, 0))

        # Initialize empty lists to hold models and scores
        models = []
        scores = []
        for T in steps:
            # TODO: Initialize PerceptronMultiClass model
            PMC_model = PerceptronMultiClass()

            print('Results on the {} dataset using our Perceptron model trained with {} steps and tolerance of {}'.format(tag, T, tol))
            # TODO: Train model on training set
            PMC_model.fit(x_train, y_train, T=T, tol=tol)

            # TODO: Test model on training set
            scores_train = PMC_model.score(x_train, y_train)
            print('Training set mean accuracy: {:.4f}'.format(scores_train))

            # TODO: Test model on validation set
            scores_val = PMC_model.score(x_val, y_val)
            print('Validation set mean accuracy: {:.4f}'.format(scores_val))

            # TODO: Save the model and its score
            models.append(PMC_model)
            scores.append(scores_val)


        # TODO: Select the best performing model on the validation set
        best_idx = np.argmax(scores)

        print('Using best model trained with {} steps and tolerance of {}'.format(steps[best_idx], tol))

        # TODO: Test model on testing set
        scores_test = models[best_idx].score(x_test, y_test)
        print('Testing set mean accuracy: {:.4f}'.format(scores_test))