In [None]:
import random
import numpy as np
from data_process import get_CIFAR10_data, get_MUSHROOM_data
from scipy.spatial import distance
from models import Perceptron, SVM, Softmax, Logistic, softmax
from kaggle_submission import output_submission_csv
from matplotlib import pyplot as plt
%matplotlib inline

# custom
try:
    from tqdm.notebook import tqdm
except ImportError:
    def tqdm(x, **kwargs):
        return x

# For auto-reloading external modules
# See http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

# Loading CIFAR-10

In the following cells we determine the number of images for each split and load the images.
<br /> 
TRAIN_IMAGES + VAL_IMAGES = (0, 50000]
, TEST_IMAGES = 10000

In [None]:
# You can change these numbers for experimentation
# For submission we will use the default values 
TRAIN_IMAGES = 40000
VAL_IMAGES = 10000

In [None]:
data = get_CIFAR10_data(TRAIN_IMAGES, VAL_IMAGES)
X_train_CIFAR, y_train_CIFAR = data['X_train'], data['y_train']
X_val_CIFAR, y_val_CIFAR = data['X_val'], data['y_val']
X_test_CIFAR, y_test_CIFAR = data['X_test'], data['y_test']
n_class_CIFAR = len(np.unique(y_test_CIFAR))

Convert the sets of images from dimensions of **(N, 3, 32, 32) -> (N, 3072)** where N is the number of images so that each **3x32x32** image is represented by a single vector.

In [None]:
X_train_CIFAR = np.reshape(X_train_CIFAR, (X_train_CIFAR.shape[0], -1))
X_val_CIFAR = np.reshape(X_val_CIFAR, (X_val_CIFAR.shape[0], -1))
X_test_CIFAR = np.reshape(X_test_CIFAR, (X_test_CIFAR.shape[0], -1))

# Loading Mushroom

In the following cells we determine the splitting of the mushroom dataset.
<br /> TRAINING + VALIDATION = 0.8, TESTING = 0.2

In [None]:
# TRAINING = 0.6 indicates 60% of the data is used as the training dataset.
VALIDATION = 0.2

In [None]:
data = get_MUSHROOM_data(VALIDATION)
X_train_MR, y_train_MR = data['X_train'], data['y_train']
X_val_MR, y_val_MR = data['X_val'], data['y_val']
X_test_MR, y_test_MR = data['X_test'], data['y_test']
n_class_MR = len(np.unique(y_test_MR))

print("Number of train samples: ", X_train_MR.shape[0])
print("Number of val samples: ", X_val_MR.shape[0])
print("Number of test samples: ", X_test_MR.shape[0])

# Utilities
## Get Accuracy
This function computes how well your model performs using accuracy as a metric.

In [None]:
def get_acc(pred, y_test):
    return np.sum(y_test == pred) / len(y_test) * 100

## Custom utilities
Utility functions that are used below go here, they aren't displayed in the PDF but can be seen in the raw notebook.

In [None]:
# customization
from dataclasses import dataclass, field
from typing import List, Callable
from functools import partial

generate_plots = True

@dataclass
class Stats:
    '''Class for keeping track of an stats for a trained model.'''
    training_accuracies : List[float] = field(default_factory=list)
    validation_accuracies : List[float] = field(default_factory=list)
    training_loss : List[float] = field(default_factory=list)
    validation_loss : List[float] = field(default_factory=list)
    L2_norm : List[float] = field(default_factory=list)

def track_history_over_epochs(model, X_train, y_train, X_val, y_val, accuracy_metric):
    stats = Stats()
    
    for _ in tqdm(model.train_one_epoch(X_train, y_train), total = model.epochs):
        stats.L2_norm.append(np.sum(model.w * model.w) / y_train.shape[0])
        
        pred_model = model.predict(X_train)
        stats.training_loss.append(np.sum(model.loss(X_train, y_train)))
        stats.training_accuracies.append(accuracy_metric(pred_model, y_train))

        pred_model = model.predict(X_val)
        stats.validation_loss.append(np.sum(model.loss(X_val, y_val)))
        stats.validation_accuracies.append(accuracy_metric(pred_model, y_val))

    return stats

class DecayFunctions:
    start = 1.0 # 1 * lr
    stop = 0.01 # 0.1 * lr
    
    # decay functions for the learning rate
    @staticmethod
    def constant(i_epoch : int, n_epochs : int, **kwargs) -> float:
        return DecayFunctions.start

    @staticmethod
    def exponential(i_epoch : int, n_epochs : int, **kwargs) -> float:
        # 1 at i_epoch = 0, some small value at i_epoch = n_epochs
        # so that last iteration will also have some update
        # return np.exp(-np.abs(kwargs['decay_rate']) * (i_epoch / n_epochs))

        # so that at the last iteration the weight is the same as linear_decay
        prefactor = np.log(DecayFunctions.start / DecayFunctions.stop)
        return DecayFunctions.start * np.exp(-prefactor * i_epoch / (n_epochs - 1))

    @staticmethod
    def linear(i_epoch : int, n_epochs : int, **kwargs) -> float:
        prefactor = (DecayFunctions.stop - DecayFunctions.start) / (n_epochs - 1)
        return DecayFunctions.start + prefactor * i_epoch

    @staticmethod
    def cos(i_epoch : int, n_epochs : int, **kwargs) -> float:
        stretch = np.pi * i_epoch / (n_epochs - 1)
        additive = 0.5 * (DecayFunctions.start + DecayFunctions.stop)
        prefactor = 0.5 * (DecayFunctions.start - DecayFunctions.stop)
        return additive + prefactor * np.cos(stretch)
    
@dataclass
class ModelParams:
    '''Class for keeping track of model params.'''
    lr : float = None
    n_epochs : int = None
    reg_const : float = None
    # custom parameters
    batch_size : int = 1
    _decay : Callable[[int], float] = DecayFunctions.linear
    
    @property
    def decay_function(self) -> float:
        return partial(self._decay, n_epochs = self.n_epochs)

def plot_stats(stats):
    import plotly.graph_objects as go
    from plotly.subplots import make_subplots
    
    # fig = go.Figure()
    fig = make_subplots(rows=1, cols=3, subplot_titles=("Loss history", "Accuracy history", "L2 norm of weights"))

    # losses
    fig.add_trace(go.Scatter(y=stats.training_loss,
                        mode='lines+markers',
                        name='training_loss'), row=1, col=1)
    fig.add_trace(go.Scatter(y= stats.validation_loss,
                        mode='lines+markers',
                        name='validation_loss'), row=1, col=1)
    # Update xaxis properties
    fig.update_xaxes(title_text="epochs", row=1, col=1)

    # losses
    fig.add_trace(go.Scatter(y = stats.training_accuracies,
                        mode='lines+markers',
                        name='training_accuracy'), row=1, col=2)
    fig.add_trace(go.Scatter(y = stats.validation_accuracies,
                        mode='lines+markers',
                        name='validation_accuracy'), row=1, col=2)
    # Update xaxis properties
    fig.update_xaxes(title_text="epochs", row=1, col=2)
    
    # l2 norm
    fig.add_trace(go.Scatter(y=stats.L2_norm,
                        mode='lines+markers', name="l2 norm"), row=1, col=3)
    fig.update_xaxes(title_text="epochs", row=1, col=3)
    
    
    return fig

"""
# look ma, no partial!
def get_decay_function(cls_str : str, n_epochs : int):
    # decay functions for the learning rate
    def no_decay(i_epoch : int, **kwargs) -> float:
        return 1.0

    def exponential_decay(i_epoch : int, **kwargs) -> float:
        # 1 at i_epoch = 0, some small value at i_epoch = n_epochs
        # so that last iteration will also have some update
        # return np.exp(-np.abs(kwargs['decay_rate']) * (i_epoch / n_epochs))

        # so that at the last iteration the weight is the same as linear_decay
        prefactor = n_epochs * np.log(n_epochs) / (n_epochs - 1)
        return np.exp(-prefactor * (i_epoch / n_epochs))

    def linear_decay(i_epoch : int, **kwargs) -> float:
        return (n_epochs - i_epoch) / (n_epochs)

    decays = {x.__name__ : x for x in (no_decay, exponential_decay, linear_decay)}
    return decays[cls_str]
"""

@dataclass
class Dataset:
    '''Class for keeping track of model params.'''
    X_train : np.ndarray = None
    y_train : np.ndarray = None
    X_val : np.ndarray = None
    y_val : np.ndarray = None
    X_test : np.ndarray = None
    y_test : np.ndarray = None
    n_class : int = None
    
CIFAR_dataset = Dataset(X_train_CIFAR, y_train_CIFAR, X_val_CIFAR, y_val_CIFAR, X_test_CIFAR, y_test_CIFAR, n_class_CIFAR)
MR_dataset = Dataset(X_train_MR, y_train_MR, X_val_MR, y_val_MR, X_test_MR, y_test_MR, n_class_MR)

def train(cls, using : ModelParams):
    accuracy_metric = get_acc

    class Gym:
        def __init__(self):
            self.model = None
            self.params = using
            self.statistics = None
            self.data = None
            
        def over(self, data : Dataset):
            # no copy, each model creates its own copy when training
            self.data = data
            
            if self.params.reg_const:
                self.model = cls(data.n_class, self.params.lr, self.params.n_epochs, self.params.reg_const, batch_size = self.params.batch_size, rate_decay = self.params.decay_function)
            else:
                self.model = cls(data.n_class, self.params.lr, self.params.n_epochs, batch_size = self.params.batch_size, rate_decay = self.params.decay_function)

            self.statistics = track_history_over_epochs(self.model, data.X_train, data.y_train, data.X_val, data.y_val, accuracy_metric)
            return self
        
        def and_report(self):
            title_str = cls.__name__ + " statistics"
            print('-' * len(title_str))
            print(title_str)
            print('-' * len(title_str))

            prediction = self.model.predict(self.data.X_train)
            print('The training accuracy after learning is given by: %f' % (accuracy_metric(prediction, self.data.y_train)))
            
            print('The validation accuracy after learning is given by: %f' % self.and_report_validation_accuracy())
            print('The testing accuracy after learning is given by: %f' % self.and_report_test_accuracy())
            
            return self
        
        def and_plot(self):
            fig = plot_stats(self.statistics)
            fig.show(renderer="notebook+pdf")
            return self

        def and_report_validation_accuracy(self):
            prediction = self.model.predict(self.data.X_val)
            return accuracy_metric(prediction, self.data.y_val)

        def and_report_test_accuracy(self):
            prediction = self.model.predict(self.data.X_test)
            return accuracy_metric(prediction, self.data.y_test)

        def get_underlying_model(self):
            return self.model
        
    return Gym()

# Perceptron

Perceptron has 2 hyperparameters that you can experiment with:
- **Learning rate** - controls how much we change the current weights of the classifier during each update. We set it at a default value of 0.5, but you should experiment with different values. We recommend changing the learning rate by factors of 10 and observing how the performance of the classifier changes. You should also try adding a **decay** which slowly reduces the learning rate over each epoch.
- **Number of Epochs** - An epoch is a complete iterative pass over all of the data in the dataset. During an epoch we predict a label using the classifier and then update the weights of the classifier according to the perceptron update rule for each sample in the training set. You should try different values for the number of training epochs and report your results.

You will implement the Perceptron classifier in the **models/perceptron.py**

The following code: 
- Creates an instance of the Perceptron classifier class 
- The train function of the Perceptron class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy


## Train Perceptron on CIFAR

In [None]:
model = Perceptron
params = ModelParams()
params.lr = 2.0
params.n_epochs = 5
params.batch_size = 2000
params._decay = DecayFunctions.linear 

trained_model = train(model, using = params).over(CIFAR_dataset).and_plot().and_report()

In [None]:
lr = 0.5
n_epochs = 20
# custom parameters
batch_size = 500
rate_decay = partial(DecayFunctions.linear, n_epochs = n_epochs)

percept_CIFAR = Perceptron(n_class_CIFAR, lr, n_epochs, batch_size = batch_size, rate_decay = rate_decay)
if generate_plots:
    percept_CIFAR_stats = track_history_over_epochs(percept_CIFAR, X_train_CIFAR, y_train_CIFAR, X_val_CIFAR, y_val_CIFAR, accuracy_metric=get_acc)
else:
    percept_CIFAR.train(X_train_CIFAR, y_train_CIFAR)

In [None]:
if generate_plots:
    fig = plot_stats(percept_CIFAR_stats)
    fig.show()
# fig = plt.figure()
# plt.plot(percept_CIFAR_stats.training_accuracies)
# plt.plot(percept_CIFAR_stats.validation_accuracies)
# fig
# percept_CIFAR.loss(X_train_CIFAR, y_train_CIFAR)

In [None]:
pred_percept = percept_CIFAR.predict(X_train_CIFAR)
print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_CIFAR)))

### Validate Perceptron on CIFAR

In [None]:
pred_percept = percept_CIFAR.predict(X_val_CIFAR)
print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_CIFAR)))

### Test Perceptron on CIFAR

In [None]:
pred_percept = percept_CIFAR.predict(X_test_CIFAR)
print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_CIFAR)))

### Perceptron_CIFAR Kaggle Submission

Once you are satisfied with your solution and test accuracy, output a file to submit your test set predictions to the Kaggle for Assignment 1 CIFAR. Use the following code to do so:

In [None]:
output_submission_csv('kaggle/perceptron_submission_CIFAR.csv', trained_model.get_underlying_model().predict(X_test_CIFAR))

## Train Perceptron on Mushroom

In [None]:
model = Perceptron
params = ModelParams()
params.lr = 1.0
params.n_epochs = 10
params.batch_size = 10
params._decay = DecayFunctions.linear 

trained_model = train(model, using = params).over(MR_dataset).and_plot().and_report()

In [None]:
lr = 0.5
n_epochs = 10
# custom parameters
batch_size = 1
rate_decay = partial(DecayFunctions.linear, n_epochs = n_epochs)

percept_MR = Perceptron(n_class_MR, lr, n_epochs, batch_size = batch_size, rate_decay = rate_decay)
if generate_plots:
    percept_MR_stats = track_history_over_epochs(percept_MR, X_train_MR, y_train_MR, X_val_MR, y_val_MR)
else:
    percept_MR.train(X_train_MR, y_train_MR)

In [None]:
if generate_plots:
    fig = plot_stats(percept_MR_stats)
    fig.show()

In [None]:
pred_percept = percept_MR.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_percept, y_train_MR)))

### Validate Perceptron on Mushroom

In [None]:
pred_percept = percept_MR.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_percept, y_val_MR)))

### Test Perceptron on Mushroom

In [None]:
pred_percept = percept_MR.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_percept, y_test_MR)))

# Support Vector Machines (with SGD)

Next, you will implement a "soft margin" SVM. In this formulation you will maximize the margin between positive and negative training examples and penalize margin violations using a hinge loss.

We will optimize the SVM loss using SGD. This means you must compute the loss function with respect to model weights. You will use this gradient to update the model weights.

SVM optimized with SGD has 3 hyperparameters that you can experiment with:
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Epochs** - similar to as defined above in Perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case it is a coefficient on the term which maximizes the margin. You could try different values. The default value is set to 0.05.

You will implement the SVM using SGD in the **models/svm.py**

The following code: 
- Creates an instance of the SVM classifier class 
- The train function of the SVM class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

## Train SVM on CIFAR

In [None]:
model = SVM
params = ModelParams()
params.lr = 1.0
params.n_epochs = 5
params.reg_const = 500.0
params.batch_size = 1000
params._decay = DecayFunctions.linear 

trained_model = train(model, using = params).over(CIFAR_dataset).and_plot().and_report()

In [None]:
lr = 5.0
n_epochs = 20
reg_const = 50.0
# custom parameters
batch_size = 200
rate_decay = partial(DecayFunctions.linear, n_epochs = n_epochs)

svm_CIFAR = SVM(n_class_CIFAR, lr, n_epochs, reg_const, batch_size = batch_size, rate_decay = rate_decay)
if generate_plots:
    svm_CIFAR_stats = track_history_over_epochs(svm_CIFAR, X_train_CIFAR, y_train_CIFAR, X_val_CIFAR, y_val_CIFAR)
else:
    svm_CIFAR.train(X_train_CIFAR, y_train_CIFAR)

In [None]:
if generate_plots:
    fig = plot_stats(svm_CIFAR_stats)
    fig.show()

In [None]:
pred_svm = svm_CIFAR.predict(X_train_CIFAR)
print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_CIFAR)))

### Validate SVM on CIFAR

In [None]:
pred_svm = svm_CIFAR.predict(X_val_CIFAR)
print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_CIFAR)))

### Test SVM on CIFAR

In [None]:
pred_svm = svm_CIFAR.predict(X_test_CIFAR)
print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_CIFAR)))

### SVM_CIFAR Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 CIFAR. Use the following code to do so:

In [None]:
output_submission_csv('kaggle/svm_submission_CIFAR.csv', trained_model.get_underlying_model().predict(X_test_CIFAR))

## Train SVM on Mushroom

In [None]:
model = SVM
params = ModelParams()
params.lr = 0.5
params.n_epochs = 20
params.reg_const = 5.0
params.batch_size = 20
params._decay = DecayFunctions.linear 

trained_model = train(model, using = params).over(MR_dataset).and_plot().and_report()

In [None]:
lr = 0.5
n_epochs = 10
reg_const = 0.05
# custom parameters
batch_size = 1
rate_decay = partial(DecayFunctions.linear, n_epochs = n_epochs)

svm_MR = SVM(n_class_MR, lr, n_epochs, reg_const, batch_size = batch_size, rate_decay = rate_decay)
if generate_plots:
    svm_MR_stats = track_history_over_epochs(svm_MR, X_train_MR, y_train_MR, X_val_MR, y_val_MR)
else:
    svm_MR.train(X_train_MR, y_train_MR)

In [None]:
if generate_plots:
    fig = plot_stats(svm_MR_stats)
    fig.show()

In [None]:
pred_svm = svm_MR.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_svm, y_train_MR)))

### Validate SVM on Mushroom

In [None]:
pred_svm = svm_MR.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_svm, y_val_MR)))

## Test SVM on Mushroom

In [None]:
pred_svm = svm_MR.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_svm, y_test_MR)))

# Softmax Classifier (with SGD)

Next, you will train a Softmax classifier. This classifier consists of a linear function of the input data followed by a softmax function which outputs a vector of dimension C (number of classes) for each data point. Each entry of the softmax output vector corresponds to a confidence in one of the C classes, and like a probability distribution, the entries of the output vector sum to 1. We use a cross-entropy loss on this sotmax output to train the model. 

Check the following link as an additional resource on softmax classification: http://cs231n.github.io/linear-classify/#softmax

Once again we will train the classifier with SGD. This means you need to compute the gradients of the softmax cross-entropy loss function according to the weights and update the weights using this gradient. Check the following link to help with implementing the gradient updates: https://deepnotes.io/softmax-crossentropy

The softmax classifier has 3 hyperparameters that you can experiment with:
- **Learning rate** - As above, this controls how much the model weights are updated with respect to their gradient.
- **Number of Epochs** - As described for perceptron.
- **Regularization constant** - Hyperparameter to determine the strength of regularization. In this case, we minimize the L2 norm of the model weights as regularization, so the regularization constant is a coefficient on the L2 norm in the combined cross-entropy and regularization objective.

You will implement a softmax classifier using SGD in the **models/softmax.py**

The following code: 
- Creates an instance of the Softmax classifier class 
- The train function of the Softmax class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

## Train Softmax on CIFAR

In [None]:
model = Softmax
params = ModelParams()
params.lr = 2.0
params.n_epochs = 30
params.reg_const = 5.0
params.batch_size = 500
params._decay = DecayFunctions.constant 

trained_model = train(model, using = params).over(CIFAR_dataset).and_plot().and_report()

In [None]:
lr = 0.5
n_epochs = 10
reg_const = 0.05
# custom parameters
batch_size = 1
rate_decay = partial(DecayFunctions.linear, n_epochs = n_epochs)

softmax_CIFAR = Softmax(n_class_CIFAR, lr, n_epochs, reg_const, batch_size = batch_size, rate_decay = rate_decay)
if generate_plots:
    softmax_CIFAR_stats = track_history_over_epochs(softmax_CIFAR, X_train_CIFAR, y_train_CIFAR, X_val_CIFAR, y_val_CIFAR)
else:
    softmax_CIFAR.train(X_train_CIFAR, y_train_CIFAR)

In [None]:
if generate_plots:
    fig = plot_stats(softmax_CIFAR_stats)
    fig.show()

In [None]:
# softmax_CIFAR.train(X_train_CIFAR, y_train_CIFAR)
pred_softmax = softmax_CIFAR.predict(X_train_CIFAR)
print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_CIFAR)))

### Validate Softmax on CIFAR

In [None]:
pred_softmax = softmax_CIFAR.predict(X_val_CIFAR)
print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_CIFAR)))

### Testing Softmax on CIFAR

In [None]:
pred_softmax = softmax_CIFAR.predict(X_test_CIFAR)
print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_CIFAR)))

### Softmax_CIFAR Kaggle Submission

Once you are satisfied with your solution and test accuracy output a file to submit your test set predictions to the Kaggle for Assignment 1 CIFAR. Use the following code to do so:

In [None]:
output_submission_csv('kaggle/softmax_submission_CIFAR.csv', trained_model.get_underlying_model().predict(X_test_CIFAR))

## Train Softmax on Mushroom

In [None]:
model = Softmax
params = ModelParams()
params.lr = 0.5
params.n_epochs = 100
params.reg_const = 1.0
params.batch_size = 5
params._decay = DecayFunctions.constant 

trained_model = train(model, using = params).over(MR_dataset).and_plot().and_report()

In [None]:
lr = 0.5
n_epochs = 10
reg_const = 0.05
# custom parameters
batch_size = 1
rate_decay = partial(DecayFunctions.linear, n_epochs = n_epochs)

softmax_MR = Softmax(n_class_MR, lr, n_epochs, reg_const, batch_size = batch_size, rate_decay = rate_decay)
if generate_plots:
    softmax_MR_stats = track_history_over_epochs(softmax_MR, X_train_MR, y_train_MR, X_val_MR, y_val_MR)
else:
    softmax_MR.train(X_train_MR, y_train_MR)

In [None]:
if generate_plots:
    fig = plot_stats(softmax_MR_stats)
    fig.show()

In [None]:
pred_softmax = softmax_MR.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_softmax, y_train_MR)))

### Validate Softmax on Mushroom

In [None]:
pred_softmax = softmax_MR.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_softmax, y_val_MR)))

### Testing Softmax on Mushroom

In [None]:
pred_softmax = softmax_MR.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_softmax, y_test_MR)))

# Logistic Classifier

The Logistic Classifier has 2 hyperparameters that you can experiment with:
- **Learning rate** - similar to as defined above in Perceptron, this parameter scales by how much the weights are changed according to the calculated gradient update. 
- **Number of Epochs** - As described for perceptron.



You will implement the Logistic Classifier in the **models/logistic.py**

The following code: 
- Creates an instance of the Logistic classifier class 
- The train function of the Logistic class is trained on the training data
- We use the predict function to find the training accuracy as well as the testing accuracy

### Training Logistic Classifer

In [None]:
model = Logistic
params = ModelParams()
params.lr = 0.5
params.n_epochs = 10
params.batch_size = 10
params._decay = DecayFunctions.linear 

trained_model = train(model, using = params).over(MR_dataset).and_plot().and_report()

In [None]:
learning_rate = 0.5
n_epochs = 10
# custom parameters
batch_size = 1
rate_decay = partial(DecayFunctions.linear, n_epochs = n_epochs)

lr = Logistic(learning_rate, n_epochs, batch_size = batch_size, rate_decay = rate_decay)
if generate_plots:
    lr_MR_stats = track_history_over_epochs(lr, X_train_MR, y_train_MR, X_val_MR, y_val_MR)
else:
    lr.train(X_train_MR, y_train_MR)
# lr.train(X_train_MR, y_train_MR)

In [None]:
if generate_plots:
    fig = plot_stats(lr_MR_stats)
    fig.show()

In [None]:
pred_lr = lr.predict(X_train_MR)
print('The training accuracy is given by: %f' % (get_acc(pred_lr, y_train_MR)))

### Validate Logistic Classifer

In [None]:
pred_lr = lr.predict(X_val_MR)
print('The validation accuracy is given by: %f' % (get_acc(pred_lr, y_val_MR)))

### Test Logistic Classifier

In [None]:
pred_lr = lr.predict(X_test_MR)
print('The testing accuracy is given by: %f' % (get_acc(pred_lr, y_test_MR)))

## Hyper-parameter search

In [None]:
import psweep as ps
import pandas as pd
from typing import Union

def get_calc_dir(model, dataset):
    if dataset == CIFAR_dataset:
        return 'calc' + model.__name__
    elif dataset == MR_dataset:
        return 'calc' + model.__name__ + '_MR'
    else:
        raise RuntimeError

def run_campaign(model, set_of_params, dataset = CIFAR_dataset):
    pbar = tqdm(total = len(set_of_params), position = 1)
    
    def _run_training(pset):
        params = ModelParams()
        params.n_epochs = pset['n_epochs']
        params.batch_size = pset['batch_size']
        params._decay = pset['_decay']
        params.lr = pset['lr']
        params.reg_const = pset.get('reg_const', None)
        pbar.update(1)
        
        total_trials = 3
        validation_accuracy = np.empty(total_trials, )
        test_accuracy = np.empty(total_trials, )
        for trial in range(total_trials):
            trained_model = train(model, using = params).over(dataset)
            validation_accuracy[trial] = (trained_model.and_report_validation_accuracy())
            test_accuracy[trial] = (trained_model.and_report_test_accuracy()) 
        return {'validation_accuracy' : validation_accuracy, 'test_accuracy' : test_accuracy}
    
    return ps.run(_run_training, set_of_params, calc_dir = get_calc_dir(model, dataset), simulate=False)

run_CIFAR_campaign = partial(run_campaign, dataset = CIFAR_dataset)
run_MR_campaign = partial(run_campaign, dataset = MR_dataset)

# should not be none though
def report_results_of(run_df : Union[pd.DataFrame, None], dataset = CIFAR_dataset):
    if not isinstance(run_df, pd.DataFrame):
        import os
        run_df = ps.df_read(os.path.join(get_calc_dir(model, dataset), "results.pk"))
    
    loc = run_df.test_accuracy.apply(np.average).argmax()
    print(run_df.iloc[loc])
    return run_df

## Perceptron search

In [None]:
model = Perceptron

sweep_epochs = ps.plist('n_epochs', [5, 10, 15])
sweep_lr = ps.plist('lr', [0.5, 1.0, 2.0])
sweep_batch_sizes = ps.plist('batch_size', [200, 500, 1000])
sweep_decay_functions = ps.plist('_decay', 
                                 [DecayFunctions.constant, 
                                  DecayFunctions.linear,
                                  DecayFunctions.exponential, 
                                  DecayFunctions.cos])

# another sweep
sweep_epochs = ps.plist('n_epochs', [5, 8, 10])
sweep_lr = ps.plist('lr', [0.5, 1.0, 2.0])
sweep_batch_sizes = ps.plist('batch_size', [1000, 2000, 5000])
sweep_decay_functions = ps.plist('_decay', 
                                 [DecayFunctions.constant, 
                                  DecayFunctions.linear,
                                  DecayFunctions.exponential, 
                                  DecayFunctions.cos])

"""
# Testing parameters
sweep_epochs = ps.plist('n_epochs', [10])
sweep_lr = ps.plist('lr', [0.05])
sweep_batch_sizes = ps.plist('batch_size', [100, 200])
sweep_decay_functions = ps.plist('_decay', 
                                 [DecayFunctions.constant])
"""

df = report_results_of(
    run_campaign(
        model, 
        ps.pgrid(sweep_lr, sweep_epochs, sweep_batch_sizes, sweep_decay_functions)
        )
    )

In [None]:
model = Perceptron
# df = ps.df_read(os.path.join("calc" + model.__name__, "results.pk"))
# loc = df.test_accuracy.apply(np.average).argmax()
# print(df.iloc[loc])
report_results_of(model)

In [None]:
model = Perceptron

sweep_epochs = ps.plist('n_epochs', [5, 10, 20])
sweep_lr = ps.plist('lr', [0.5, 1.0, 2.0])
sweep_batch_sizes = ps.plist('batch_size', [10, 20, 50])
sweep_decay_functions = ps.plist('_decay', 
                                 [DecayFunctions.constant, 
                                  DecayFunctions.linear,
                                  # DecayFunctions.exponential, 
                                  # DecayFunctions.cos
                                 ])

df = report_results_of(
    run_MR_campaign(
        model, 
        ps.pgrid(sweep_lr, sweep_epochs, sweep_batch_sizes, sweep_decay_functions)
        )
)

In [None]:
model = Perceptron
report_results_of(model, MR_dataset)

## SVM search

In [None]:
model = SVM
# params = ModelParams()
# params.lr = 2.0
# params.n_epochs = 5
# params.reg_const = 500.0
# params.batch_size = 2000
# params._decay = DecayFunctions.linear 

sweep_epochs = ps.plist('n_epochs', [5, 10, 15])
sweep_lr = ps.plist('lr', [0.5, 1.0, 2.0])
sweep_batch_sizes = ps.plist('batch_size', [1000, 2000, 5000])
sweep_reg_const = ps.plist('reg_const', [500.0, 1000.0])
sweep_decay_functions = ps.plist('_decay', 
                                 [DecayFunctions.constant, 
                                  DecayFunctions.linear,
                                  DecayFunctions.exponential, 
                                  DecayFunctions.cos
                                 ])

df = report_results_of(
    run_campaign(
        model, 
        ps.pgrid(sweep_lr, sweep_epochs, sweep_batch_sizes, sweep_reg_const, sweep_decay_functions)
        )
)

In [None]:
model = SVM
report_results_of(model)

In [None]:
model = SVM

sweep_epochs = ps.plist('n_epochs', [5, 10, 20])
sweep_lr = ps.plist('lr', [0.5, 1.0, 2.0])
sweep_batch_sizes = ps.plist('batch_size', [10, 20, 50])
sweep_reg_const = ps.plist('reg_const', [5.0, 10.0])
sweep_decay_functions = ps.plist('_decay', 
                                 [DecayFunctions.constant, 
                                  DecayFunctions.linear,
                                  # DecayFunctions.exponential, 
                                  # DecayFunctions.cos
                                 ])

df = report_results_of(
    run_MR_campaign(
        model, 
        ps.pgrid(sweep_lr, sweep_epochs, sweep_batch_sizes, sweep_reg_const, sweep_decay_functions)
        )
)

In [None]:
model = SVM
report_results_of(model, MR_dataset)

## Softmax

In [None]:
model = Softmax

sweep_epochs = ps.plist('n_epochs', [10, 20, 30])
sweep_lr = ps.plist('lr', [0.05, 0.5, 2.0])
sweep_batch_sizes = ps.plist('batch_size', [100, 500])
sweep_reg_const = ps.plist('reg_const', [0.05, 0.5, 5.0])
sweep_decay_functions = ps.plist('_decay', 
                                 [DecayFunctions.constant, 
                                  DecayFunctions.linear,
                                  # DecayFunctions.exponential, 
                                  # DecayFunctions.cos
                                 ])

df = report_results_of(
    run_campaign(
        model, 
        ps.pgrid(sweep_lr, sweep_epochs, sweep_batch_sizes, sweep_reg_const, sweep_decay_functions)
        )
)

In [None]:
model = Softmax

sweep_epochs = ps.plist('n_epochs', [50, 100])
sweep_lr = ps.plist('lr', [0.5, 1.0, 2.0])
sweep_batch_sizes = ps.plist('batch_size', [5, 10, 20])
sweep_reg_const = ps.plist('reg_const', [1.0])
sweep_decay_functions = ps.plist('_decay', 
                                 [DecayFunctions.constant, 
                                  DecayFunctions.linear,
                                 ])

df = report_results_of(
    run_MR_campaign(
        model, 
        ps.pgrid(sweep_lr, sweep_epochs, sweep_batch_sizes, sweep_reg_const, sweep_decay_functions)
        )
)

In [None]:
model = Softmax
report_results_of(model, MR_dataset)

## Logistic 

In [None]:
model = Logistic

sweep_epochs = ps.plist('n_epochs', [5, 10, 20])
sweep_lr = ps.plist('lr', [0.5, 1.0, 2.0])
sweep_batch_sizes = ps.plist('batch_size', [10, 20, 50])
sweep_decay_functions = ps.plist('_decay', 
                                 [DecayFunctions.constant, 
                                  DecayFunctions.linear,
                                  # DecayFunctions.exponential, 
                                  # DecayFunctions.cos
                                 ])

df = report_results_of(
    run_MR_campaign(
        model, 
        ps.pgrid(sweep_lr, sweep_epochs, sweep_batch_sizes, sweep_decay_functions)
        )
)