In [None]:
from scipy.stats import norm
import torch
from math import ceil
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter
import torch.optim as optim
import torchvision
import torch.nn.functional as F
from functools import partial
import functorch
from functorch import combine_state_for_ensemble
from functorch import vmap
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy import stats
from sklearn.metrics import zero_one_loss
from sklearn.metrics import mean_squared_error
import torchvision.transforms as transforms
from sklearn.datasets import fetch_openml
import os, shutil
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
from tqdm import tqdm

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import PolynomialFeatures

# from google.colab import drive
# drive.mount('/content/drive')

# os.chdir("/content/drive/My Drive/6.s898 Project/Project")
# !mkdir scikit_learn_data
# !mkdir mlp-model
# os.mkdir('ensemble-model')

In [None]:
from google.colab import drive
drive.mount('/content/drive')

os.chdir("/content/drive/My Drive/6.s898 Project/Project")

Mounted at /content/drive


In [None]:
def save_obj(obj, name):
    with open(name + '.pkl', 'wb') as f:
        pickle.dump(obj, f, pickle.HIGHEST_PROTOCOL)

def load_obj(name):
    with open(name + '.pkl', 'rb') as f:
        return pickle.load(f)

In [None]:
def get_next_param_count(param_counts, losses, past_dd=False, alpha=2):
    """Predicts the next paramter count given a list of
    prior parameter counts and losses. This works by fitting
    a third degree polynomial to the param_counts (independent
    variable) and the losses (dependent variable) and using the
    polynomial's derivative to detect when the interpolation
    threshold curve has been reached. This will make
    the spacing between the parameter counts differ depending
    on how close the model is to exhibiting double descent.

    ...
    Parameters
    ----------
    param_counts : list or np.array of ints
        The list of prior parameter counts that the model was
        trained over
    losses : list or np.array of floats
        The list of final losses for each model with
        corresponding paramter counts
    past_dd : bool
        Flag to indicate whether the interpolation threshold
        has been reached. Set to False by default.
    alpha : float
        Tuning paramter that increases or decreases the value of
        the next parameter count

    Returns
    -------
    param_count : int
        The next parameter count
    past_dd : bool
        Flag that indicates whether the interpolation threshold
        has been reached. This should be used as the input for
        the next iteration of this algorithm
    """

    current_iter = param_counts[-1]

    if type(losses) != np.ndarray:
        losses = np.array(losses)

    if type(param_counts) != np.ndarray:
        param_counts = np.array(param_counts)

    # Create weight vector
    # We weight datapoints that are further away from
    # the current parameter count less (1/n) depending on
    # how many indices (n) away it is from the current one
    w = np.arange(1,len(param_counts) + 1, 1)
    w = w/w.max()

    poly = np.polyfit(param_counts[:], losses[:], 3, w=w)

    dy = 3*poly[0]*(current_iter - 10**-4)**2 + 2*poly[1]*(current_iter - 10**-4) + poly[2]

    # The current iteration of this adaptive parameter
    # count algorithm does not use the second derivative
    # dy2 = 6*poly[0]*(current_iter - 10**-3) + 2*poly[1]

    sgn = 1 if dy < 0 else 0

    if sgn == 0:
        past_dd = True

    next_count = sgn*max(alpha*dy, 3) + 1

    if sgn and past_dd:
        return ceil(next_count) + current_iter + 10, past_dd

    return ceil(next_count) + current_iter, past_dd


def labels_to_vec(labels, classes=10):
    """Turns integer labels into one-hot vectors

    ...
    Parameters
    ----------
    labels : np.array
        The list of integer labels {0, 1, ... N}
    classes : int
        The number of classes in the dataset

    Returns
    -------
    label_vectors : np.array
        Stack of one-hot vectors
    """

    out = []
    for label in labels:
        vec = np.array([0] * classes)
        vec[int(label)] = 1
        out.append(vec)

    return np.stack(out)

def torch_zero_one_loss(outputs, labels):
    """TEMP DOCSTRING"""

    return torch.div((torch.argmax(outputs, dim=1) != labels).sum().double(), len(labels))

def sk_zero_one_loss(x, y):
    """Utils wrapper for sk-learn zero_one_loss

    ...
    Parameters
    ----------
    x : np.array
        List of values/vectors to compare
    y : np.array
        List of values/vector to compare

    Returns
    -------
    mse : np.array
        Zero-one Loss of elements of x and y
    """

    return zero_one_loss(x,y)

def sk_mean_squared_error(x, y):
    """Utils wrapper for sk-learn mean_squared_error

    ...
    Parameters
    ----------
    x : np.array
        List of values/vectors to compare
    y : np.array
        List of values/vector to compare

    Returns
    -------
    mse : np.array
        Mean squared error of elements of x and y
    """

    return mean_squared_error(x, y)



class TensorBoardUtils:

    def __init__(self):
        pass

    def matplotlib_imshow(self, img, one_channel=False):
        if one_channel:
            img = img.mean(dim=0)
        img = img / 2 + 0.5     # unnormalize
        npimg = img.cpu().numpy()
        if one_channel:
            plt.imshow(npimg, cmap="plasma")
        else:
            plt.imshow(np.transpose(npimg, (1, 2, 0)))



    def images_to_probs(self, net, images):
        '''
        Generates predictions and corresponding probabilities from a trained
        network and a list of images
        '''
        output = net(images)
        # convert output probabilities to predicted class
        _, preds_tensor = torch.max(output, 1)
        preds_tensor = preds_tensor.cpu()
        preds = np.squeeze(preds_tensor.numpy())
        return preds, [F.softmax(el, dim=0)[i].item() for i, el in zip(preds, output)]


    def plot_classes_preds(self, net, images, labels):
        '''
        Generates matplotlib Figure using a trained network, along with images
        and labels from a batch, that shows the network's top prediction along
        with its probability, alongside the actual label, coloring this
        information based on whether the prediction was correct or not.
        Uses the "images_to_probs" function.
        '''
        preds, probs = images_to_probs(net, images)
        classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')
        # plot the images in the batch, along with predicted and true labels
        fig = plt.figure(figsize=(12, 48))
        for idx in np.arange(4):
            ax = fig.add_subplot(1, 4, idx+1, xticks=[], yticks=[])
            matplotlib_imshow(images[idx], one_channel=True)
            ax.set_title("{0}, {1:.1f}%\n(label: {2})".format(
                classes[preds[idx]],
                probs[idx] * 100.0,
                classes[labels[idx]]),
                        color=("green" if preds[idx]==labels[idx].item() else "red"))
        return fig

In [None]:
class TorchData:
    """This class contains the attributes that all datasets have in common.
    All datasets will inherit from this class

    ...
    Attributes
    ----------
    train_loader : PyTorch Dataloader
        The dataloader for the training set
    train_loader : PyTorch Dataloader
        The dataloader for the testing set
    data_x_dim : int
        The size of the x-dimension for each image in the dataset
    data_y_dim : int
        The size of the y-dimension for each image in the dataset
    """

    def __init__(self):
        self.train_loader = None
        self.test_loader = None
        self.data_x_dim = None
        self.data_y_dim = None

class MNIST(TorchData):
    """The MNIST Dataset (Handwritten Digits)

    ...
    Attributes
    ----------
    train_loader : PyTorch Dataloader
        The dataloader for the training set
    train_loader : PyTorch Dataloader
        The dataloader for the testing set
    data_x_dim : int
        The size of the x-dimension for each image in the dataset
    data_y_dim : int
        The size of the y-dimension for each image in the dataset
    train_batch_size : int
        The number of training examples per batch
    test_batch_size : int
        The number of testing examples per batch
    dataloaders : dict
        A dictionary that contains the 2 dataloaders. The keys are
        "train" and "test"
    """

    def __init__(self, training_samples=4000, train_batch=128, test_batch=128):
        print('Initializing MNIST')
        self.train_batch_size = train_batch
        self.test_batch_size = test_batch
        self.training_samples = training_samples

        self.train_loader = torch.utils.data.DataLoader(
                                torchvision.datasets.MNIST('./data',
                                   train=True,
                                   download=True,
                                   transform=torchvision.transforms.Compose([
                                       torchvision.transforms.ToTensor(),
                                       torchvision.transforms.Normalize((0.1307,), (0.3081,))])),
                                batch_size=self.train_batch_size, shuffle=True)

        train_dataset = torch.utils.data.Subset(self.train_loader.dataset,
                                                range(0, training_samples))

        self.train_loader = torch.utils.data.DataLoader(train_dataset,
                                                        batch_size=self.train_batch_size,
                                                        shuffle=True)

        self.test_loader = torch.utils.data.DataLoader(
                        torchvision.datasets.MNIST('./data',
                           train=False,
                           download=True,
                           transform=torchvision.transforms.Compose([
                               torchvision.transforms.ToTensor(),
                               torchvision.transforms.Normalize((0.1307,), (0.3081,))])),
                        batch_size=self.test_batch_size, shuffle=False)

        test_dataset = torch.utils.data.Subset(self.test_loader.dataset,
                                               range(0, int(training_samples//.75) - training_samples))

        self.test_loader = torch.utils.data.DataLoader(test_dataset,
                                                       batch_size=self.test_batch_size,
                                                       shuffle=True)

        # Only use a subset of the MNIST dataset for MLP
        self.dataloaders = {'train': self.train_loader,
                            'test': self.test_loader}

        self.data_x_dim = self.train_loader.dataset[0][0].shape[1]
        self.data_y_dim = self.train_loader.dataset[0][0].shape[2]
        self.classes = list(set([self.dataloaders['train'].dataset[i][1] for i in range(len(self.dataloaders['train']))]))
        self.num_classes = len(self.classes)
        self.samples = len(self.train_loader.dataset)
        self.dataset_sizes = {'train': self.samples, 'test': len(self.test_loader.dataset)}

class SKLearnData:

    def __init__(self):
        pass

    def get_mnist(filename=None, samples=100):
        """Returns a subset of the the MNIST Dataset as numpy arrays

        ...
        Parameters
        ----------
        samples : int
            The number of datapoints that the user wants to be
            returned. The size of the returned validation set
            will be samples/2
        filename : str
            The filename that the dataset will be saved to.

        Returns
        -------
        X : np.array
            Training set of 784 (28*28) dimensional vectors
            that correspond to 28x28 MNIST images
        y : np.array
            Labels for each of the vectors in X
        X_val : np.array
            Training set of 784 (28*28) dimensional vectors
            that correspond to 28x28 MNIST images
        y_val : np.array
            Labels for each of the vectors in X_val
        """


        if os.path.exists('scikit_learn_data/X_saved.npy') and os.path.exists('scikit_learn_data/Y_saved.npy'):
            X = np.load('scikit_learn_data/X_saved.npy')
            y = np.load('scikit_learn_data/Y_saved.npy', allow_pickle=True)
        else:
            print('Fetching MNIST')
            X, y = fetch_openml('mnist_784', version=1, return_X_y=True, as_frame=False)
            np.save('scikit_learn_data/X_saved.npy', X)
            np.save('scikit_learn_data/Y_saved.npy', y)

        if (samples + samples//2) > X.shape[0] - 1:
            samples = X.shape[0] - 1

        X_val = X[samples + 1:(samples + samples//2)]
        y_val = y[samples + 1:(samples + samples//2)]
        X = X[:samples + 1]
        y = y[:samples + 1]

        return X, y, X_val, y_val


In [None]:
os.chdir('/content/drive/MyDrive/6.s898 Project/Project')

In [None]:
class Plotter:
    """This class has tools for plotting the double descent curve for different models"""

    def __init__(self):
        pass

    def plot_adaboost(self, collected_data):
        """Plot double descent with the dictionary returned after training the scikit-learn
        Random Forest classifier. The plots are saved to the current directory

        ...
        Parameters
        ----------
        collected_data : dict
            The dictionary obtained by running double_descent on the RandomForest model
        """

        custom_ticks_label = []
        custom_ticks_x = []
        for i in range(len(collected_data['trees'])):

            if (collected_data['trees'][i] in (1, 25)) or (collected_data['trees'][i] == 50 and collected_data['forests'][i] in (1, 10, 20, 30)):
                if i < len(collected_data['trees']) - 1 and collected_data['trees'][i] == collected_data['trees'][i + 1] and collected_data['forests'][i] == collected_data['forests'][i + 1]:
                  continue
                custom_ticks_label.append(
                    str(collected_data['trees'][i]) + ' / ' + str(collected_data['forests'][i]))
                custom_ticks_x.append(i)

        plt.figure(figsize=(40, 40))
        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        samples = collected_data['samples']

        plt.title(f'AdaBoost on {dataset}; {samples} samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters N_tree/N_forest')
        ax1.set_ylabel('Squared Loss')
        ax1.plot(range(len(collected_data['mse_loss'])), collected_data['mse_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['mse_loss']))
        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()  # otherwise the right y-label is slightly clipped


        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')

        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)

        try:
            os.mkdir('adaboost-figures')
        except:
            pass

        try:
            os.mkdir('adaboost-figures/squared_loss')
        except:
            pass

        directory = os.listdir('adaboost-figures/squared_loss')

        path = f'adaboost-figures/squared_loss/dd_adaboost_squared_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()

        # ---------------------------------------------------
        plt.clf()
        plt.figure(figsize=(40, 40))

        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        samples = collected_data['samples']

        plt.title(f'AdaBoost on {dataset}; {samples} samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters N_tree/N_forest')
        ax1.set_ylabel('Zero-One Loss (%)')
        ax1.plot(range(len(collected_data['zero_one_loss'])), collected_data['zero_one_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['zero_one_loss']))

        ax2 = ax1.twinx()
        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()

        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')


        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)


        try:
            os.mkdir('adaboost-figures')
        except:
            pass

        try:
            os.mkdir('adaboost-figures/zero_one')
        except:
            pass

        directory = os.listdir('adaboost-figures/zero_one')

        path = f'adaboost-figures/zero_one/dd_adaboost_zero_one_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()


    def plot_l2boost(self, collected_data):
        """Plot double descent with the dictionary returned after training the scikit-learn
        Random Forest classifier. The plots are saved to the current directory

        ...
        Parameters
        ----------
        collected_data : dict
            The dictionary obtained by running double_descent on the RandomForest model
        """

        custom_ticks_label = []
        custom_ticks_x = []
        for i in range(len(collected_data['trees'])):

            if (collected_data['trees'][i] in (1, 25)) or (collected_data['trees'][i] == 50 and collected_data['forests'][i] in (1, 10, 20, 30)):
                if i < len(collected_data['trees']) - 1 and collected_data['trees'][i] == collected_data['trees'][i + 1] and collected_data['forests'][i] == collected_data['forests'][i + 1]:
                  continue
                custom_ticks_label.append(
                    str(collected_data['trees'][i]) + ' / ' + str(collected_data['forests'][i]))
                custom_ticks_x.append(i)

        plt.figure(figsize=(40, 40))
        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        samples = collected_data['samples']

        plt.title(f'L2-Boost on {dataset}; {samples} samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters N_tree/N_forest')
        ax1.set_ylabel('Squared Loss')
        ax1.plot(range(len(collected_data['mse_loss'])), collected_data['mse_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['mse_loss']))
        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()  # otherwise the right y-label is slightly clipped


        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')

        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)

        try:
            os.mkdir('l2boost-figures')
        except:
            pass

        try:
            os.mkdir('l2boost-figures/squared_loss')
        except:
            pass

        directory = os.listdir('l2boost-figures/squared_loss')

        path = f'l2boost-figures/squared_loss/dd_l2boost_squared_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()

        # ---------------------------------------------------
        plt.clf()
        plt.figure(figsize=(40, 40))

        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        samples = collected_data['samples']

        plt.title(f'L2-Boost on {dataset}; {samples} samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters N_tree/N_forest')
        ax1.set_ylabel('Zero-One Loss (%)')
        ax1.plot(range(len(collected_data['zero_one_loss'])), collected_data['zero_one_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['zero_one_loss']))

        ax2 = ax1.twinx()
        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()

        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')


        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)


        try:
            os.mkdir('l2boost-figures')
        except:
            pass

        try:
            os.mkdir('l2boost-figures/zero_one')
        except:
            pass

        directory = os.listdir('l2boost-figures/zero_one')

        path = f'l2boost-figures/zero_one/dd_l2boost_zero_one_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()

    def plot_random_forest(self, collected_data):
        """Plot double descent with the dictionary returned after training the scikit-learn
        Random Forest classifier. The plots are saved to the current directory

        ...
        Parameters
        ----------
        collected_data : dict
            The dictionary obtained by running double_descent on the RandomForest model
        """

        custom_ticks_label = []
        custom_ticks_x = []
        for i in range(len(collected_data['leaf_sizes'])):

            if (collected_data['leaf_sizes'][i] in (10, 1010)) or (collected_data['leaf_sizes'][i] == 2010 and collected_data['trees'][i] in (1, 10, 20, 30)):
                if i < len(collected_data['leaf_sizes']) - 1 and collected_data['leaf_sizes'][i] == collected_data['leaf_sizes'][i + 1] and collected_data['trees'][i] == collected_data['trees'][i + 1]:
                  continue
                custom_ticks_label.append(
                    str(collected_data['leaf_sizes'][i]) + ' / ' + str(collected_data['trees'][i]))
                custom_ticks_x.append(i)

        plt.figure(figsize=(40, 40))
        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        samples = collected_data['samples']

        plt.title(f'Random Forest on {dataset}; {samples} samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters N_max_leaf/N_tree')
        ax1.set_ylabel('Squared Loss')
        ax1.plot(range(len(collected_data['mse_loss'])), collected_data['mse_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['mse_loss']))
        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()  # otherwise the right y-label is slightly clipped


        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')

        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)

        try:
            os.mkdir('random-forest-figures')
        except:
            pass

        try:
            os.mkdir('random-forest-figures/squared_loss')
        except:
            pass

        directory = os.listdir('random-forest-figures/squared_loss')

        path = f'random-forest-figures/squared_loss/dd_random_forest_squared_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()

        # ---------------------------------------------------
        plt.clf()
        plt.figure(figsize=(40, 40))

        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        samples = collected_data['samples']

        plt.title(f'Random Forest on {dataset}; {samples} samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters N_max_leaf/N_tree')
        ax1.set_ylabel('Zero-One Loss (%)')
        ax1.plot(range(len(collected_data['zero_one_loss'])), collected_data['zero_one_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['zero_one_loss']))

        ax2 = ax1.twinx()
        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()

        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')


        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)


        try:
            os.mkdir('random-forest-figures')
        except:
            pass

        try:
            os.mkdir('random-forest-figures/zero_one')
        except:
            pass

        directory = os.listdir('random-forest-figures/zero_one')

        path = f'random-forest-figures/zero_one/dd_rf_zero_one_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()

    def plot_decisiontree(self, collected_data):
        """Plot double descent with the dictionary returned after training the scikit-learn
        Random Forest classifier. The plots are saved to the current directory

        ...
        Parameters
        ----------
        collected_data : dict
            The dictionary obtained by running double_descent on the RandomForest model
        """

        custom_ticks_label = []
        custom_ticks_x = []
        for i in range(len(collected_data['leaf_sizes'])):

            if (collected_data['leaf_sizes'][i] in (2, 102)) or (collected_data['leaf_sizes'][i] == 202 and collected_data['depths'][i] in (1, 51)) or (collected_data['leaf_sizes'][i] == 202 and collected_data['depths'][i] == 101 and collected_data['features'][i] in ('28', '420', '784')):
                if i < len(collected_data['leaf_sizes']) - 1 and collected_data['leaf_sizes'][i] == collected_data['leaf_sizes'][i + 1] and collected_data['depths'][i] == collected_data['depths'][i + 1] and collected_data['features'][i] in ('sqrt', 28) and collected_data['features'][i + 1] in ('sqrt', 28):
                  continue
                if collected_data['features'][i] == 'sqrt':
                  collected_data['features'][i] = 28
                else:
                  collected_data['features'][i] = int(collected_data['features'][i])
                custom_ticks_label.append(
                    str(collected_data['leaf_sizes'][i]) + '/' + str(collected_data['depths'][i]) + '/' + str(collected_data['features'][i]))
                custom_ticks_x.append(i)

        plt.figure(figsize=(40, 70))
        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        samples = collected_data['samples']

        plt.title(f'Decision Tree on {dataset}; {samples} samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters N_max_leaves/max_depth/max_features')
        ax1.set_ylabel('Squared Loss')
        ax1.plot(range(len(collected_data['mse_loss'])), collected_data['mse_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['mse_loss']))
        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()  # otherwise the right y-label is slightly clipped


        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')

        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)

        try:
            os.mkdir('decision-tree-figures')
        except:
            pass

        try:
            os.mkdir('decision-tree-figures/squared_loss')
        except:
            pass

        directory = os.listdir('decision-tree-figures/squared_loss')

        path = f'decision-tree-figures/squared_loss/dd_decision_tree_squared_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()

        # ---------------------------------------------------
        plt.clf()
        plt.figure(figsize=(40, 40))

        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        samples = collected_data['samples']

        plt.title(f'Decision Tree on {dataset}; {samples} samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters N_max_leaves/max_depth/max_features')
        ax1.set_ylabel('Zero-One Loss (%)')
        ax1.plot(range(len(collected_data['zero_one_loss'])), collected_data['zero_one_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['zero_one_loss']))

        ax2 = ax1.twinx()
        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()

        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')


        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)


        try:
            os.mkdir('decision-tree-figures')
        except:
            pass

        try:
            os.mkdir('decision-tree-figures/zero_one')
        except:
            pass

        directory = os.listdir('decision-tree-figures/zero_one')

        path = f'decision-tree-figures/zero_one/dd_decision_tree_zero_one_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()

    def plot_logreg(self, collected_data):
        """Plot double descent with the dictionary returned after training the scikit-learn
        Random Forest classifier. The plots are saved to the current directory

        ...
        Parameters
        ----------
        collected_data : dict
            The dictionary obtained by running double_descent on the RandomForest model
        """

        custom_ticks_label = []
        custom_ticks_x = []
        for i in tqdm(range(len(collected_data['sample_sizes']))):

            if collected_data['sample_sizes'][i] in (7840, 3136, 1568, 784, 392, 196):
                if collected_data['sample_sizes'][i] in (784, 392, 196):
                  custom_ticks_label.append(str(784//collected_data['sample_sizes'][i]))
                else:
                  custom_ticks_label.append(str(784/collected_data['sample_sizes'][i]))
                custom_ticks_x.append(i)

        plt.figure(figsize=(40, 40))
        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        # samples = collected_data['samples']

        plt.title(f'Logistic Regression on {dataset}; varying samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters Features/Samples')
        ax1.set_ylabel('Squared Loss')
        ax1.plot(range(len(collected_data['mse_loss'])), collected_data['mse_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['mse_loss']))
        ax2 = ax1.twinx()  # instantiate a second axes that shares the same x-axis

        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()  # otherwise the right y-label is slightly clipped


        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')

        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)

        try:
            os.mkdir('logistic-regression-dataset-figures')
        except:
            pass

        try:
            os.mkdir('logistic-regression-dataset-figures/squared_loss')
        except:
            pass

        directory = os.listdir('logistic-regression-dataset-figures/squared_loss')

        path = f'logistic-regression-dataset-figures/squared_loss/dd_logistic_regression_squared_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()

        # ---------------------------------------------------
        plt.clf()
        plt.figure(figsize=(40, 40))

        fig, ax1 = plt.subplots()

        dataset = collected_data['dataset']
        # samples = collected_data['samples']

        plt.title(f'Logistic Regression on {dataset}; varying samples')

        color = 'tab:blue'
        ax1.set_xlabel('Model parameters Features/Samples')
        ax1.set_ylabel('Zero-One Loss (%)')
        ax1.plot(range(len(collected_data['zero_one_loss'])), collected_data['zero_one_loss'], color=color)
        ax1.set_ylim(0, max(collected_data['zero_one_loss']))

        ax2 = ax1.twinx()
        color = 'tab:orange'
        ax2.plot(range(len(collected_data['train_loss'])), collected_data['train_loss'], color=color)
        ax2.axes.yaxis.set_visible(False)
        ax2.set_ylim(0, max(collected_data['train_loss']))
        fig.tight_layout()

        train = mlines.Line2D([], [], color='tab:orange',
                                  markersize=15, label='Train')

        test = mlines.Line2D([], [], color='tab:blue',
                                  markersize=15, label='Test')


        plt.legend(handles=[train, test])

        plt.xticks(custom_ticks_x, custom_ticks_label)


        try:
            os.mkdir('logistic-regression-dataset-figures')
        except:
            pass

        try:
            os.mkdir('logistic-regression-dataset-figures/zero_one')
        except:
            pass

        directory = os.listdir('logistic-regression-dataset-figures/zero_one')

        path = f'logistic-regression-dataset-figures/zero_one/dd_logistic_regression_zero_one_{len(directory)}.jpg'

        plt.savefig(path)
        plt.close()

In [None]:
rf = load_obj('logregD-dd-experiment')
#rf['sample_sizes']
plotter = Plotter()
plotter.plot_logreg(rf)

100%|██████████| 40/40 [00:00<00:00, 47621.96it/s]


<Figure size 4000x4000 with 0 Axes>

<Figure size 4000x4000 with 0 Axes>

In [None]:
class TorchModels():
    """This class contains the attributes that all PyTorch models
    have in common. All PyTorch models will inherit from this class

    ...
    Parameters (Not Attributes)
    ---------------------------
        cuda : bool
            If True, the model will train using GPU acceleration if a CUDA
            GPU is available. If False, the model will train on CPU

    Attributes
    ----------
        loss : str
            The loss function for the model. Options are {'L1', 'MSE',
            'CrossEntropy'}.
        dataset : str
            The dataset that the model will be trained on. Options are
            {'MNIST'}.
        cuda : bool
            If True, the model will train using GPU acceleration if a CUDA
            GPU is available. If False, the model will train on CPU
        training_samples : int
            Desired number of elements from the training set
    """

    def __init__(self, loss, dataset, batch_size, training_samples, cuda):
        super(TorchModels, self).__init__()

        loss_functions = {'L1': nn.L1Loss(),
                          'MSE': nn.MSELoss(),
                          'CrossEntropy': nn.CrossEntropyLoss(),
                          'NegativeLog':nn.NLLLoss()}

        datasets = {'MNIST' : MNIST(training_samples=training_samples, train_batch=batch_size, test_batch=batch_size)}

        self.loss = loss_functions[loss]
        self.data = datasets[dataset]
        self.cuda = cuda

        if self.cuda and torch.cuda.is_available():
            self.device = torch.device('cuda')
        else:
            self.device = torch.device('cpu')


class MultilayerPerceptron(TorchModels):
    """A wrapper for a multilayer perceptron with a single hidden layer of variable size

    ...

    Attributes
    ----------

        loss : str
            The loss function for the model. Options are {'L1', 'MSE',
            'CrossEntropy'}.
        dataset : str
            The dataset that the model will be trained on. Options are
            {'MNIST'}.
        batch_size : int
            The batch size for the training set
        cuda : bool
            If True, cuda will be used instead of cpu
        optimizer : str
            The optimizer that the model will use while training. Options are
            {'SGD'}
        learning_rate : float
            Learning rate for optimizer
        momentum : float
            Momentum parameter to accelerate SGD
        scheduler_step_size : int
            Number of iterations before applying learning rate scheduler
        gamma : float
            Learning rate scheduler factor
        current_count : int
            The index of the current parameter count in param_counts
        param_counts : np.array
            List of parameter counts that the model will be trained over.
            Since this model is an MLP, these counts correspond to N*10^3
            neurons for a parameter count, N.
        generate_parameters : True
            Uses a parameter adaptation algorithm to predict the next best model
            to train by analyzing the final loss vs hidden layer size of all previous
            models
        training_samples : int
            Desired number of elements from the training set
        factor : int
            Multiplier for param_counts. factor * param_counts[i] = number
            of neurons in hidden layer
        reuse_weights : True
            If True, reuses weights from previous model to help next model converge
            more quickly
        seed : int
            Seed for random weight initialization
        max_epochs : int
            The max number of iterations to train each model
    """

    class MLP(nn.Module):
        """An implementation of a 2-layer multilayer perceptron that allows
        for changing the number of neurons in the hidden layer"""

        def __init__(self, current_count, data, param_counts, factor, hidden_layer_size):
            super().__init__()
            print(f'Initializing MLP with {hidden_layer_size} hidden units')

            self.data_dims = (data.data_x_dim, data.data_y_dim)

            self.input_layer = nn.Linear(self.data_dims[0] * self.data_dims[1],
                                         hidden_layer_size)

            self.hidden_layer = nn.Linear(hidden_layer_size, data.num_classes)

        def forward(self, x):
            x = x.view(-1, self.data_dims[0] * self.data_dims[1])
            x = F.relu(self.input_layer(x))
            x = self.hidden_layer(x)
            return x


    def __init__(self, loss='CrossEntropy',
                 dataset='MNIST',
                 batch_size=128,
                 cuda=False,
                 optimizer='SGD',
                 learning_rate=.01,
                 momentum=.95,
                 scheduler_step_size=500,
                 gamma=.9,
                 current_count=0,
                 param_counts=np.array([1, 2, 3]),
                 generate_parameters=True,
                 training_samples=4000,
                 factor=10**3,
                 reuse_weights=True,
                 seed=None,
                 max_epochs=1000):

        super(MultilayerPerceptron, self).__init__(loss, dataset, batch_size, training_samples, cuda)

        if seed:
            torch.manual_seed(seed)
        self.param_counts = param_counts
        self.current_count = current_count
        self.samples = training_samples
        self.post_flag = 0
        self.generate_parameters = generate_parameters
        self.factor = factor
        self.reuse_weights = reuse_weights
        self.model = self.MLP(self.current_count, self.data, self.param_counts, self.factor, self.hidden_layer_size)

        self.learning_rate = learning_rate
        self.momentum = momentum
        self.optim_dict = {'SGD': optim.SGD(self.model.parameters(),
                                            lr=self.learning_rate,
                                            momentum=self.momentum)}

        self.optimizer = optimizer
        self.mlp_optim = self.optim_dict[self.optimizer]
        self.gamma = gamma
        self.scheduler_step_size = scheduler_step_size
        self.scheduler = optim.lr_scheduler.StepLR(self.mlp_optim,
                                                   step_size=self.scheduler_step_size,
                                                   gamma=self.gamma)

        self.losses = {'train': np.array([]),
                       'test': np.array([]),
                       'zero_one_train': np.array([]),
                       'zero_one_test': np.array([])}
        self.max_epochs = max_epochs


    @property
    def input_layer(self):
        return self.model.input_layer


    @property
    def hidden_layer(self):
        return self.model.hidden_layer

    @property
    def hidden_layer_size(self):
        # This computes the size of the hidden layer, H, using the equation
        # Total_Parameters = (d+1)*H + (H + 1)*K
        return (self.param_counts[self.current_count] * self.factor \
                + self.data.num_classes)//(self.data.data_x_dim * self.data.data_y_dim + 1)

    def reinitialize_classifier(self):
        """Uses new parameter count to initialize the next MLP.
        The N weights from the previous model are transplanted into
        the first N spots of the new model with M > N parameters"""

        new_model = self.MLP(self.current_count, self.data, self.param_counts, self.factor, self.hidden_layer_size)

        if self.reuse_weights:

            in_weights = torch.randn_like(new_model.input_layer.weight)*.01
            hidden_weights = torch.randn_like(new_model.hidden_layer.weight)*.01

            in_weights[:self.model.input_layer.weight.shape[0]] = self.model.input_layer.weight
            hidden_weights[:,:self.model.hidden_layer.weight.shape[1]] = self.model.hidden_layer.weight[:]

            new_model.input_layer.weights = torch.nn.Parameter(data=in_weights)
            new_model.hidden_layer.weights = torch.nn.Parameter(data=hidden_weights)

        self.model = new_model

        self.optim_dict = {'SGD': optim.SGD(self.model.parameters(),
                                            lr=self.learning_rate,
                                            momentum=self.momentum)}

        self.mlp_optim = self.optim_dict[self.optimizer]

        if self.param_counts[self.current_count] * self.factor > self.samples * self.data.num_classes:
            self.gamma = 1

        self.scheduler = optim.lr_scheduler.StepLR(self.mlp_optim,
                                                   step_size=self.scheduler_step_size,
                                                   gamma=self.gamma)


    def train(self):
        """Trains the MLP model using the selected loss function,
        optimizer, and scheduler. This also outputs to tensorboard.
        To access all of the summaries for trained models, run the
        tensorboard command in another command line while the model
        is training

        ...

        Returns
        -------
        model
             A PyTorch neural network object that has been trained
        train_loss : list
            A list of all training losses at the end of each epoch
        test_acc : list
            A list of all test losses at the end of each epoch
        zero_one_loss : list
            A list of all 0-1 training losses at the end of each epoch
        zero_one_acc : list
            A list of all 0-1 test losses at the end of each epoch
        """

        tb_utils = TensorBoardUtils()
        model_writer = SummaryWriter(f'mlp-runs/dd_model_{self.param_counts[self.current_count]}')

        # get some random training images
        dataiter = iter(self.data.dataloaders['train'])
        images, labels = next(dataiter)

        # create grid of images
        img_grid = torchvision.utils.make_grid(images)

        # show images
        tb_utils.matplotlib_imshow(img_grid, one_channel=True)

        # write to tensorboard
        model_writer.add_image('MNIST Dataset', img_grid)
        model_writer.add_graph(self.model, images)
        model_writer.close()

        train_loss = []
        test_acc = []
        zero_one_loss = []
        zero_one_acc = []

        print('Model with parameter count {}'.format(self.param_counts[self.current_count]))
        print('-' * 10)

        if self.cuda:
            self.model = self.model.cuda()

        for epoch in tqdm(range(self.max_epochs)):
            # if epoch % 500 == 0:
            #     print('Epoch {}/{}'.format(epoch + 1, self.max_epochs))
            #     # print('-' * 10)

            # Switches between training and testing sets
            for phase in ['train', 'test']:

                if phase == 'train':
                    self.model.train()
                    running_loss = 0.0
                    running_zero_one_loss = 0.0
                elif phase == 'test':
                    self.model.eval()   # Set model to evaluate mode
                    running_test_loss = 0.0
                    running_zero_one_acc = 0.0

                # Train/Test loop
                for i, d in enumerate(self.data.dataloaders[phase], 0):

                    inputs, labels = d

                    if self.cuda:
                        inputs = inputs.cuda()
                        labels = labels.cuda()
                    self.mlp_optim.zero_grad()

                    if phase == 'train':
                        outputs = self.model.forward(inputs)
                        loss = self.loss(outputs, labels)
                        # backward + optimize only if in training phase
                        loss.backward()
                        self.mlp_optim.step()
                        zero_one_train = torch_zero_one_loss(outputs, labels)
                        running_zero_one_loss += zero_one_train.item() * inputs.size(0)
                        running_loss += loss.item() * inputs.size(0)

                    if phase == 'test':
                        outputs = self.model.forward(inputs)
                        test_loss = self.loss(outputs, labels)
                        zero_one_test = torch_zero_one_loss(outputs, labels)
                        running_zero_one_acc += zero_one_test.item() * inputs.size(0)
                        running_test_loss += test_loss.item() * inputs.size(0)

                if phase == 'train' and self.post_flag == False:
                    self.scheduler.step()

            train_loss.append(running_loss/ self.data.dataset_sizes['train'])
            test_acc.append(running_test_loss/ self.data.dataset_sizes['test'])
            zero_one_loss.append(running_zero_one_loss/self.data.dataset_sizes['train'])
            zero_one_acc.append(running_zero_one_acc/self.data.dataset_sizes['test'])

            model_writer.add_scalar(f'Train-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters',
                              train_loss[-1],
                             epoch)

            model_writer.add_scalar(f'Test-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters',
                              test_acc[-1],
                              epoch)

            model_writer.add_scalar(f'Train-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters (Zero-One)',
                                    zero_one_loss[-1],
                                    epoch)

            model_writer.add_scalar(f'Test-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters (Zero-One)',
                                    zero_one_acc[-1],
                                    epoch)


            if (zero_one_loss[-1] == 0 or train_loss[-1] < 10**-4):
                if self.generate_parameters:
                    if self.post_flag:
                        break

                if self.param_counts[self.current_count] * self.factor < self.samples * self.data.num_classes:
                    break

        print('Train Loss: {:.4f}\nTest Loss {:.4f}\n{} Hidden Units'.format(train_loss[-1], test_acc[-1], self.hidden_layer_size))

        torch.cuda.empty_cache()

        return self.model, train_loss, test_acc, zero_one_loss, zero_one_acc


    def double_descent(self):
        """Uses the train and get_next_param_count methods
        to train the same architecture with varying parameter
        sizes. This method also keeps track of the final losses
        of each model that is trained

        ...
        Parameters
        ----------
        model : Models instance
            The model object that will be trained with varying
            parameter sizes

        Returns
        -------
        None
        """
        try:
            os.makedirs('mlp-output')
        except Exception as E:
            print('Could not make mlp-output')
            print(E)
            pass

        try:
            shutil.rmtree('mlp-runs')
        except:
            pass

        dd_writer = SummaryWriter('mlp-runs/double-descent')
        while self.current_count < len(self.param_counts):

            _, train_loss, test_loss, zero_one_train, zero_one_test = self.train()
            save_obj(self, f'mlp-model/{self.param_counts[self.current_count]}_width')

            self.losses['train'] = np.append(self.losses['train'], train_loss[-1])
            self.losses['test'] = np.append(self.losses['test'], test_loss[-1])
            self.losses['zero_one_train'] = np.append(self.losses['zero_one_train'], zero_one_train[-1])
            self.losses['zero_one_test'] = np.append(self.losses['zero_one_test'], zero_one_test[-1])

            dd_writer.add_scalar('MLP-Double-Descent/Train',
                                 self.losses['train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('MLP-Double-Descent/Test',
                                 self.losses['test'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('MLP-Double-Descent/Train (Zero-One)',
                                 self.losses['zero_one_train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('MLP-Double-Descent/Test (Zero-One)',
                                 self.losses['zero_one_test'][-1],
                                 self.param_counts[self.current_count])
            self.current_count += 1

            if self.current_count < len(self.param_counts):
                self.reinitialize_classifier()

            np.save('mlp-output/train_loss.npy', self.losses['train'])
            np.save('mlp-output/test_loss.npy', self.losses['test'])
            np.save('mlp-output/zero_one_train.npy', self.losses['zero_one_train'])
            np.save('mlp-output/zero_one_test.npy', self.losses['zero_one_test'])
            np.save('mlp-output/parameter_counts', self.param_counts)

        if not self.generate_parameters:

            return {'train_loss': self.losses['train'],
                    'test_loss': self.losses['test'],
                    'zero_one_train': self.losses['zero_one_train'],
                    'zero_one_test': self.losses['zero_one_test'],
                    'parameter_counts': self.param_counts}


        self.current_count -= 1
        flag = False
        while self.post_flag < 4:


            next_ct, flag = get_next_param_count(self.param_counts,
                                                 self.losses['test']/self.losses['test'].sum(),
                                                 flag)
            save_obj(self, f'mlp-model/{next_ct}_width')
            self.param_counts = np.append(self.param_counts, next_ct)
            self.current_count += 1
            self.reinitialize_classifier()

            _, train_loss, test_loss, zero_one_train, zero_one_test = self.train()

            self.losses['train'] = np.append(self.losses['train'], train_loss[-1])
            self.losses['test'] = np.append(self.losses['test'], test_loss[-1])
            self.losses['zero_one_train'] = np.append(self.losses['zero_one_train'], zero_one_train[-1])
            self.losses['zero_one_test'] = np.append(self.losses['zero_one_test'], zero_one_test[-1])

            dd_writer.add_scalar('MLP-Double-Descent/Train',
                                 self.losses['train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('MLP-Double-Descent/Test',
                                 self.losses['test'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('MLP-Double-Descent/Train (Zero-One)',
                                 self.losses['zero_one_train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('MLP-Double-Descent/Test (Zero-One)',
                                 self.losses['zero_one_test'][-1],
                                 self.param_counts[self.current_count])

            if flag and (self.param_counts[-1] - self.param_counts[-2]) != 1:
                print('Iterating Post Flag')
                self.post_flag += 1
                print(f'Post Flag {self.post_flag}')

            np.save('mlp-output/train_loss.npy', self.losses['train'])
            np.save('mlp-output/test_loss.npy', self.losses['test'])
            np.save('mlp-output/zero_one_train.npy', self.losses['zero_one_train'])
            np.save('mlp-output/zero_one_test.npy', self.losses['zero_one_test'])
            np.save('mlp-output/parameter_counts', self.param_counts)

        return {'train_loss': self.losses['train'],
                'test_loss': self.losses['test'],
                'zero_one_train': self.losses['zero_one_train'],
                'zero_one_test': self.losses['zero_one_test'],
                'parameter_counts': self.param_counts}

In [None]:
class EnsembleMLP(TorchModels):
    """A wrapper for a multilayer perceptron with a single hidden layer of variable size

    ...

    Attributes
    ----------

        loss : str
            The loss function for the model. Options are {'L1', 'MSE',
            'CrossEntropy'}.
        dataset : str
            The dataset that the model will be trained on. Options are
            {'MNIST'}.
        batch_size : int
            The batch size for the training set
        cuda : bool
            If True, cuda will be used instead of cpu
        optimizer : str
            The optimizer that the model will use while training. Options are
            {'SGD'}
        learning_rate : float
            Learning rate for optimizer
        momentum : float
            Momentum parameter to accelerate SGD
        scheduler_step_size : int
            Number of iterations before applying learning rate scheduler
        gamma : float
            Learning rate scheduler factor
        current_count : int
            The index of the current parameter count in param_counts
        param_counts : np.array
            List of parameter counts that the model will be trained over.
            Since this model is an MLP, these counts correspond to N*10^3
            neurons for a parameter count, N.
        generate_parameters : True
            Uses a parameter adaptation algorithm to predict the next best model
            to train by analyzing the final loss vs hidden layer size of all previous
            models
        training_samples : int
            Desired number of elements from the training set
        factor : int
            Multiplier for param_counts. factor * param_counts[i] = number
            of neurons in hidden layer
        reuse_weights : True
            If True, reuses weights from previous model to help next model converge
            more quickly
        seed : int
            Seed for random weight initialization
        max_epochs : int
            The max number of iterations to train each model
    """

    def __init__(self, loss='CrossEntropy',
                 dataset='MNIST',
                 batch_size=128,
                 cuda=False,
                 optimizer='SGD',
                 learning_rate=.01,
                 momentum=.95,
                 scheduler_step_size=500,
                 gamma=.9,
                 current_count=0,
                 param_counts=np.array([1, 2, 3]),
                 generate_parameters=True,
                 training_samples=4000,
                 factor=10**3,
                 reuse_weights=True,
                 seed=None,
                 max_epochs=1000,
                 num_models=5):

        super(EnsembleMLP, self).__init__(loss, dataset, batch_size, training_samples, cuda)

        if seed:
            torch.manual_seed(seed)
        self.param_counts = param_counts
        self.current_count = current_count
        self.samples = training_samples
        self.post_flag = 0
        self.generate_parameters = generate_parameters
        self.factor = factor
        self.reuse_weights = reuse_weights
        self.num_models = num_models
        self.model = [MultilayerPerceptron.MLP(self.current_count, self.data, self.param_counts, self.factor, self.hidden_layer_size).to("cuda:0") for _ in range(self.num_models)]
        self.fmodel, self.params, self.buffers = combine_state_for_ensemble(self.model)
        [p.requires_grad_() for p in self.params];
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.optim_dict = {'SGD': optim.SGD(self.params,
                                            lr=self.learning_rate,
                                            momentum=self.momentum)}

        self.optimizer = optimizer
        self.mlp_optim = self.optim_dict[self.optimizer]
        self.gamma = gamma
        self.scheduler_step_size = scheduler_step_size
        self.scheduler = optim.lr_scheduler.StepLR(self.mlp_optim,
                                                   step_size=self.scheduler_step_size,
                                                   gamma=self.gamma)

        self.losses = {'train': np.array([]),
                       'test': np.array([]),
                       'zero_one_train': np.array([]),
                       'zero_one_test': np.array([])}
        self.max_epochs = max_epochs


    @property
    def input_layer(self):
        return self.model.input_layer


    @property
    def hidden_layer(self):
        return self.model.hidden_layer

    @property
    def hidden_layer_size(self):
        # This computes the size of the hidden layer, H, using the equation
        # Total_Parameters = (d+1)*H + (H + 1)*K
        return (self.param_counts[self.current_count] * self.factor \
                + self.data.num_classes)//(self.data.data_x_dim * self.data.data_y_dim + 1)

    def reinitialize_classifier(self):
        """Uses new parameter count to initialize the next MLP.
        The N weights from the previous model are transplanted into
        the first N spots of the new model with M > N parameters"""

        new_model = [MultilayerPerceptron.MLP(self.current_count, self.data, self.param_counts, self.factor, self.hidden_layer_size).to('cuda') for _ in range(self.num_models)]

        if self.reuse_weights:
            for i in range(self.num_models):
              in_weights = torch.randn_like(new_model[i].input_layer.weight)*.01
              hidden_weights = torch.randn_like(new_model[i].hidden_layer.weight)*.01

              in_weights[:self.model[i].input_layer.weight.shape[0]] = self.model[i].input_layer.weight
              hidden_weights[:,:self.model[i].hidden_layer.weight.shape[1]] = self.model[i].hidden_layer.weight[:]

              new_model[i].input_layer.weights = torch.nn.Parameter(data=in_weights)
              new_model[i].hidden_layer.weights = torch.nn.Parameter(data=hidden_weights)

        self.model = new_model
        self.fmodel, self.params, self.buffers = combine_state_for_ensemble(self.model)
        [p.requires_grad_() for p in self.params];
        self.optim_dict = {'SGD': optim.SGD(self.params,
                                            lr=self.learning_rate,
                                            momentum=self.momentum)}

        self.mlp_optim = self.optim_dict[self.optimizer]

        if self.param_counts[self.current_count] * self.factor > self.samples * self.data.num_classes:
            self.gamma = 1

        self.scheduler = optim.lr_scheduler.StepLR(self.mlp_optim,
                                                   step_size=self.scheduler_step_size,
                                                   gamma=self.gamma)


    def train(self):
        """Trains the MLP model using the selected loss function,
        optimizer, and scheduler. This also outputs to tensorboard.
        To access all of the summaries for trained models, run the
        tensorboard command in another command line while the model
        is training

        ...

        Returns
        -------
        model
             A PyTorch neural network object that has been trained
        train_loss : list
            A list of all training losses at the end of each epoch
        test_acc : list
            A list of all test losses at the end of each epoch
        zero_one_loss : list
            A list of all 0-1 training losses at the end of each epoch
        zero_one_acc : list
            A list of all 0-1 test losses at the end of each epoch
        """

        tb_utils = TensorBoardUtils()
        model_writer = SummaryWriter(f'ensemble-runs/dd_model_{self.param_counts[self.current_count]}_networks_{self.num_models}')

        # get some random training images
        dataiter = iter(self.data.dataloaders['train'])
        images, labels = next(dataiter)

        # create grid of images
        img_grid = torchvision.utils.make_grid(images)

        # show images
        tb_utils.matplotlib_imshow(img_grid, one_channel=True)

        # write to tensorboard
        # model_writer.add_image('MNIST Dataset', img_grid)
        #model_writer.add_graph(self.model, images)
        model_writer.close()

        train_loss = []
        test_acc = []
        zero_one_loss = []
        zero_one_acc = []

        print('Model with parameter count {}'.format(self.param_counts[self.current_count]))
        print('-' * 10)

        if self.cuda:
            for i in range(self.num_models):
              self.model[i] = self.model[i].cuda()

        for epoch in tqdm(range(self.max_epochs)):
            # if epoch % 500 == 0:
            #     print('Epoch {}/{}'.format(epoch + 1, self.max_epochs))
            #     # print('-' * 10)

            # Switches between training and testing sets
            for phase in ['train', 'test']:

                if phase == 'train':
                    for i in range(self.num_models):
                      self.model[i].train()
                    running_loss = 0.0
                    running_zero_one_loss = 0.0
                elif phase == 'test':
                    for i in range(self.num_models):
                      self.model[i].eval()   # Set model to evaluate mode
                    running_test_loss = 0.0
                    running_zero_one_acc = 0.0

                # Train/Test loop
                for i, d in enumerate(self.data.dataloaders[phase], 0):

                    inputs, labels = d

                    if self.cuda:
                        inputs = inputs.cuda()
                        labels = labels.cuda()
                    self.mlp_optim.zero_grad()

                    if phase == 'train':
                        outputs = torch.mean(vmap(self.fmodel, (0, 0, None))(self.params, self.buffers, inputs), dim=0)
                        loss = self.loss(outputs, labels)
                        # backward + optimize only if in training phase
                        loss.backward()
                        self.mlp_optim.step()
                        zero_one_train = torch_zero_one_loss(outputs, labels)
                        running_zero_one_loss += zero_one_train.item() * inputs.size(0)
                        running_loss += loss.item() * inputs.size(0)

                    if phase == 'test':
                        outputs = torch.mean(vmap(self.fmodel, (0, 0, None))(self.params, self.buffers, inputs), dim=0)
                        test_loss = self.loss(outputs, labels)
                        zero_one_test = torch_zero_one_loss(outputs, labels)
                        running_zero_one_acc += zero_one_test.item() * inputs.size(0)
                        running_test_loss += test_loss.item() * inputs.size(0)

                if phase == 'train' and self.post_flag == False:
                    self.scheduler.step()

            train_loss.append(running_loss/ self.data.dataset_sizes['train'])
            test_acc.append(running_test_loss/ self.data.dataset_sizes['test'])
            zero_one_loss.append(running_zero_one_loss/self.data.dataset_sizes['train'])
            zero_one_acc.append(running_zero_one_acc/self.data.dataset_sizes['test'])

            model_writer.add_scalar(f'Train-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters',
                              train_loss[-1],
                             epoch)

            model_writer.add_scalar(f'Test-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters',
                              test_acc[-1],
                              epoch)

            model_writer.add_scalar(f'Train-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters (Zero-One)',
                                    zero_one_loss[-1],
                                    epoch)

            model_writer.add_scalar(f'Test-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters (Zero-One)',
                                    zero_one_acc[-1],
                                    epoch)


            if (zero_one_loss[-1] == 0 or train_loss[-1] < 10**-4):
                if self.generate_parameters:
                    if self.post_flag:
                        break

                if self.param_counts[self.current_count] * self.factor < self.samples * self.data.num_classes:
                    break

        print('Train Loss: {:.4f}\nTest Loss {:.4f}\n{} Hidden Units'.format(train_loss[-1], test_acc[-1], self.hidden_layer_size))

        torch.cuda.empty_cache()

        return self.model, train_loss, test_acc, zero_one_loss, zero_one_acc


    def double_descent(self):
        """Uses the train and get_next_param_count methods
        to train the same architecture with varying parameter
        sizes. This method also keeps track of the final losses
        of each model that is trained

        ...
        Parameters
        ----------
        model : Models instance
            The model object that will be trained with varying
            parameter sizes

        Returns
        -------
        None
        """
        try:
            os.makedirs('ensemble-output')
        except Exception as E:
            print('Could not make ensemble-output')
            print(E)
            pass

        try:
            shutil.rmtree('ensemble-runs')
        except:
            pass

        dd_writer = SummaryWriter('ensemble-runs/double-descent')
        while self.current_count < len(self.param_counts):

            _, train_loss, test_loss, zero_one_train, zero_one_test = self.train()
            save_obj(self, f'ensemble-model/{self.param_counts[self.current_count]}_width')

            self.losses['train'] = np.append(self.losses['train'], train_loss[-1])
            self.losses['test'] = np.append(self.losses['test'], test_loss[-1])
            self.losses['zero_one_train'] = np.append(self.losses['zero_one_train'], zero_one_train[-1])
            self.losses['zero_one_test'] = np.append(self.losses['zero_one_test'], zero_one_test[-1])

            dd_writer.add_scalar('Ensemble-Double-Descent/Train',
                                 self.losses['train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Test',
                                 self.losses['test'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Train (Zero-One)',
                                 self.losses['zero_one_train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Test (Zero-One)',
                                 self.losses['zero_one_test'][-1],
                                 self.param_counts[self.current_count])
            self.current_count += 1

            if self.current_count < len(self.param_counts):
                self.reinitialize_classifier()

            np.save('ensemble-output/train_loss.npy', self.losses['train'])
            np.save('ensemble-output/test_loss.npy', self.losses['test'])
            np.save('ensemble-output/zero_one_train.npy', self.losses['zero_one_train'])
            np.save('ensemble-output/zero_one_test.npy', self.losses['zero_one_test'])
            np.save('ensemble-output/parameter_counts', self.param_counts)

        if not self.generate_parameters:

            return {'train_loss': self.losses['train'],
                    'test_loss': self.losses['test'],
                    'zero_one_train': self.losses['zero_one_train'],
                    'zero_one_test': self.losses['zero_one_test'],
                    'parameter_counts': self.param_counts}


        self.current_count -= 1
        flag = False
        while self.post_flag < 4:


            next_ct, flag = get_next_param_count(self.param_counts,
                                                 self.losses['test']/self.losses['test'].sum(),
                                                 flag)
            save_obj(self, f'ensemble-model/{next_ct}_width')
            self.param_counts = np.append(self.param_counts, next_ct)
            self.current_count += 1
            self.reinitialize_classifier()

            _, train_loss, test_loss, zero_one_train, zero_one_test = self.train()

            self.losses['train'] = np.append(self.losses['train'], train_loss[-1])
            self.losses['test'] = np.append(self.losses['test'], test_loss[-1])
            self.losses['zero_one_train'] = np.append(self.losses['zero_one_train'], zero_one_train[-1])
            self.losses['zero_one_test'] = np.append(self.losses['zero_one_test'], zero_one_test[-1])

            dd_writer.add_scalar('Ensemble-Double-Descent/Train',
                                 self.losses['train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Test',
                                 self.losses['test'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Train (Zero-One)',
                                 self.losses['zero_one_train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Test (Zero-One)',
                                 self.losses['zero_one_test'][-1],
                                 self.param_counts[self.current_count])

            if flag and (self.param_counts[-1] - self.param_counts[-2]) != 1:
                print('Iterating Post Flag')
                self.post_flag += 1
                print(f'Post Flag {self.post_flag}')

            np.save('ensemble-output/train_loss.npy', self.losses['train'])
            np.save('ensemble-output/test_loss.npy', self.losses['test'])
            np.save('ensemble-output/zero_one_train.npy', self.losses['zero_one_train'])
            np.save('ensemble-output/zero_one_test.npy', self.losses['zero_one_test'])
            np.save('ensemble-output/parameter_counts', self.param_counts)

        return {'train_loss': self.losses['train'],
                'test_loss': self.losses['test'],
                'zero_one_train': self.losses['zero_one_train'],
                'zero_one_test': self.losses['zero_one_test'],
                'parameter_counts': self.param_counts}

In [None]:
class WeakEnsemble(TorchModels):
    """A wrapper for a multilayer perceptron with a single hidden layer of variable size

    ...

    Attributes
    ----------

        loss : str
            The loss function for the model. Options are {'L1', 'MSE',
            'CrossEntropy'}.
        dataset : str
            The dataset that the model will be trained on. Options are
            {'MNIST'}.
        batch_size : int
            The batch size for the training set
        cuda : bool
            If True, cuda will be used instead of cpu
        optimizer : str
            The optimizer that the model will use while training. Options are
            {'SGD'}
        learning_rate : float
            Learning rate for optimizer
        momentum : float
            Momentum parameter to accelerate SGD
        scheduler_step_size : int
            Number of iterations before applying learning rate scheduler
        gamma : float
            Learning rate scheduler factor
        current_count : int
            The index of the current parameter count in param_counts
        param_counts : np.array
            List of parameter counts that the model will be trained over.
            Since this model is an MLP, these counts correspond to N*10^3
            neurons for a parameter count, N.
        generate_parameters : True
            Uses a parameter adaptation algorithm to predict the next best model
            to train by analyzing the final loss vs hidden layer size of all previous
            models
        training_samples : int
            Desired number of elements from the training set
        factor : int
            Multiplier for param_counts. factor * param_counts[i] = number
            of neurons in hidden layer
        reuse_weights : True
            If True, reuses weights from previous model to help next model converge
            more quickly
        seed : int
            Seed for random weight initialization
        max_epochs : int
            The max number of iterations to train each model
    """

    class MLP(nn.Module):
        """An implementation of a 2-layer multilayer perceptron that allows
        for changing the number of neurons in the hidden layer"""

        def __init__(self, current_count, data, param_counts, factor, hidden_layer_size):
            super().__init__()
            print(f'Initializing MLP with {hidden_layer_size} hidden units')

            self.data_dims = (data.data_x_dim, data.data_y_dim)

            self.input_layer = nn.Linear(self.data_dims[0] * self.data_dims[1],
                                         hidden_layer_size)

            self.hidden_layer = nn.Linear(hidden_layer_size, data.num_classes)

        def forward(self, x):
            x = x.view(-1, self.data_dims[0] * self.data_dims[1])
            x = F.relu(self.input_layer(x))
            x = self.hidden_layer(x)
            return x

    def __init__(self, model1, model2, model3, model4, loss='CrossEntropy',
                 dataset='MNIST',
                 batch_size=128,
                 cuda=False,
                 optimizer='SGD',
                 learning_rate=.01,
                 momentum=.95,
                 scheduler_step_size=500,
                 gamma=.9,
                 current_count=0,
                 param_counts=np.array([1, 2, 3]),
                 generate_parameters=True,
                 training_samples=4000,
                 factor=10**3,
                 reuse_weights=True,
                 seed=None,
                 max_epochs=1000,
                 num_votes=1):

        super(WeakEnsemble, self).__init__(loss, dataset, batch_size, training_samples, cuda)

        if seed:
            torch.manual_seed(seed)
        self.param_counts = param_counts
        self.current_count = current_count
        self.samples = training_samples
        self.post_flag = 0
        self.generate_parameters = generate_parameters
        self.factor = factor
        self.reuse_weights = reuse_weights
        self.num_votes = num_votes
        self.model = self.MLP(self.current_count, self.data, self.param_counts, self.factor, self.hidden_layer_size)
        self.model1 = model1
        self.model2 = model2
        self.model3 = model3
        self.model4 = model4
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.optim_dict = {'SGD': optim.SGD(self.model.parameters(),
                                            lr=self.learning_rate,
                                            momentum=self.momentum)}

        self.optimizer = optimizer
        self.mlp_optim = self.optim_dict[self.optimizer]
        self.gamma = gamma
        self.scheduler_step_size = scheduler_step_size
        self.scheduler = optim.lr_scheduler.StepLR(self.mlp_optim,
                                                   step_size=self.scheduler_step_size,
                                                   gamma=self.gamma)

        self.losses = {'train': np.array([]),
                       'test': np.array([]),
                       'zero_one_train': np.array([]),
                       'zero_one_test': np.array([])}
        self.max_epochs = max_epochs

    def predict_small(self, input):
        np_input = input.view(input.shape[0], 784).cpu().numpy()
        pred1 = labels_to_vec(self.model1.predict(np_input))
        pred2 = labels_to_vec(self.model2.predict(np_input))
        pred3 = labels_to_vec(self.model3.predict(np_input))
        pred4 = labels_to_vec(stats.mode(np.array(
                [self.model4[i].predict(np_input) for i in range(len(self.model4))]).astype(int), keepdims=True)[0].reshape(-1).astype(str))
        return torch.from_numpy(pred1 + pred2 + pred3 + pred4).to("cuda")


    @property
    def input_layer(self):
        return self.model.input_layer


    @property
    def hidden_layer(self):
        return self.model.hidden_layer

    @property
    def hidden_layer_size(self):
        # This computes the size of the hidden layer, H, using the equation
        # Total_Parameters = (d+1)*H + (H + 1)*K
        return (self.param_counts[self.current_count] * self.factor \
                + self.data.num_classes)//(self.data.data_x_dim * self.data.data_y_dim + 1)

    def reinitialize_classifier(self):
        """Uses new parameter count to initialize the next MLP.
        The N weights from the previous model are transplanted into
        the first N spots of the new model with M > N parameters"""

        new_model = self.MLP(self.current_count, self.data, self.param_counts, self.factor, self.hidden_layer_size)

        if self.reuse_weights:

            in_weights = torch.randn_like(new_model.input_layer.weight)*.01
            hidden_weights = torch.randn_like(new_model.hidden_layer.weight)*.01

            in_weights[:self.model.input_layer.weight.shape[0]] = self.model.input_layer.weight
            hidden_weights[:,:self.model.hidden_layer.weight.shape[1]] = self.model.hidden_layer.weight[:]

            new_model.input_layer.weights = torch.nn.Parameter(data=in_weights)
            new_model.hidden_layer.weights = torch.nn.Parameter(data=hidden_weights)

        self.model = new_model

        self.optim_dict = {'SGD': optim.SGD(self.model.parameters(),
                                            lr=self.learning_rate,
                                            momentum=self.momentum)}

        self.mlp_optim = self.optim_dict[self.optimizer]

        if self.param_counts[self.current_count] * self.factor > self.samples * self.data.num_classes:
            self.gamma = 1

        self.scheduler = optim.lr_scheduler.StepLR(self.mlp_optim,
                                                   step_size=self.scheduler_step_size,
                                                   gamma=self.gamma)


    def train(self):
        """Trains the MLP model using the selected loss function,
        optimizer, and scheduler. This also outputs to tensorboard.
        To access all of the summaries for trained models, run the
        tensorboard command in another command line while the model
        is training

        ...

        Returns
        -------
        model
             A PyTorch neural network object that has been trained
        train_loss : list
            A list of all training losses at the end of each epoch
        test_acc : list
            A list of all test losses at the end of each epoch
        zero_one_loss : list
            A list of all 0-1 training losses at the end of each epoch
        zero_one_acc : list
            A list of all 0-1 test losses at the end of each epoch
        """

        tb_utils = TensorBoardUtils()
        model_writer = SummaryWriter(f'{self.num_votes}-ensemble-runs/dd_model_{self.param_counts[self.current_count]}')

        # get some random training images
        dataiter = iter(self.data.dataloaders['train'])
        images, labels = next(dataiter)

        # create grid of images
        img_grid = torchvision.utils.make_grid(images)

        # show images
        tb_utils.matplotlib_imshow(img_grid, one_channel=True)

        # write to tensorboard
        model_writer.add_image('MNIST Dataset', img_grid)
        model_writer.add_graph(self.model, images)
        model_writer.close()

        train_loss = []
        test_acc = []
        zero_one_loss = []
        zero_one_acc = []

        print('Model with parameter count {}'.format(self.param_counts[self.current_count]))
        print('-' * 10)

        if self.cuda:
            self.model = self.model.cuda()

        for epoch in tqdm(range(self.max_epochs)):
            # if epoch % 500 == 0:
            #     print('Epoch {}/{}'.format(epoch + 1, self.max_epochs))
            #     # print('-' * 10)

            # Switches between training and testing sets
            for phase in ['train', 'test']:

                if phase == 'train':
                    self.model.train()
                    running_loss = 0.0
                    running_zero_one_loss = 0.0
                elif phase == 'test':
                    self.model.eval()   # Set model to evaluate mode
                    running_test_loss = 0.0
                    running_zero_one_acc = 0.0

                # Train/Test loop
                for i, d in enumerate(self.data.dataloaders[phase], 0):

                    inputs, labels = d

                    if self.cuda:
                        inputs = inputs.cuda()
                        labels = labels.cuda()
                    self.mlp_optim.zero_grad()

                    if phase == 'train':
                        outputs = self.model.forward(inputs)
                        small_outputs = self.predict_small(inputs)
                        loss = self.loss(outputs, labels)
                        # backward + optimize only if in training small_outputs), labels)
                        # backward + optimize only if in training phase
                        loss.backward()
                        self.mlp_optim.step()
                        one_hot = F.one_hot(torch.argmax(outputs, dim=1), 10)
                        # for _ in range(1000):
                        #   print(outputs[0], one_hot[0])
                        zero_one_train = torch_zero_one_loss(self.num_votes/(self.num_votes + 4) * one_hot + 1/(self.num_votes + 4) * small_outputs, labels)
                        running_zero_one_loss += zero_one_train.item() * inputs.size(0)
                        running_loss += loss.item() * inputs.size(0)

                    if phase == 'test':
                        outputs = self.model.forward(inputs)
                        small_outputs = self.predict_small(inputs)
                        test_loss = self.loss(outputs, labels)
                        one_hot = F.one_hot(torch.argmax(outputs, dim=1), 10)
                        zero_one_test = torch_zero_one_loss(self.num_votes/(self.num_votes + 4) * one_hot + 1/(self.num_votes + 4) * small_outputs, labels)
                        running_zero_one_acc += zero_one_test.item() * inputs.size(0)
                        running_test_loss += test_loss.item() * inputs.size(0)

                if phase == 'train' and self.post_flag == False:
                    self.scheduler.step()

            train_loss.append(running_loss/ self.data.dataset_sizes['train'])
            test_acc.append(running_test_loss/ self.data.dataset_sizes['test'])
            zero_one_loss.append(running_zero_one_loss/self.data.dataset_sizes['train'])
            zero_one_acc.append(running_zero_one_acc/self.data.dataset_sizes['test'])

            model_writer.add_scalar(f'Train-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters',
                              train_loss[-1],
                             epoch)

            model_writer.add_scalar(f'Test-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters',
                              test_acc[-1],
                              epoch)

            model_writer.add_scalar(f'Train-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters (Zero-One)',
                                    zero_one_loss[-1],
                                    epoch)

            model_writer.add_scalar(f'Test-Loss/{self.hidden_layer_size} hidden units; {self.param_counts[self.current_count]}*{self.factor} total parameters (Zero-One)',
                                    zero_one_acc[-1],
                                    epoch)


            if (zero_one_loss[-1] == 0 or train_loss[-1] < 10**-4):
                if self.generate_parameters:
                    if self.post_flag:
                        break

                if self.param_counts[self.current_count] * self.factor < self.samples * self.data.num_classes:
                    break

        print('Train Loss: {:.4f}\nTest Loss {:.4f}\n{} Hidden Units'.format(zero_one_loss[-1], zero_one_acc[-1], self.hidden_layer_size))

        torch.cuda.empty_cache()

        return self.model, train_loss, test_acc, zero_one_loss, zero_one_acc


    def double_descent(self):
        """Uses the train and get_next_param_count methods
        to train the same architecture with varying parameter
        sizes. This method also keeps track of the final losses
        of each model that is trained

        ...
        Parameters
        ----------
        model : Models instance
            The model object that will be trained with varying
            parameter sizes

        Returns
        -------
        None
        """
        try:
            os.makedirs(f'{self.num_votes}-ensemble-output')
        except Exception as E:
            print('Could not make mlp-output')
            print(E)
            pass

        try:
            shutil.rmtree(f'{self.num_votes}-ensemble-runs')
        except:
            pass

        dd_writer = SummaryWriter(f'{self.num_votes}-ensemble-runs/double-descent')
        while self.current_count < len(self.param_counts):

            _, train_loss, test_loss, zero_one_train, zero_one_test = self.train()
            save_obj(self, f'{self.num_votes}-ensemble-model/{self.param_counts[self.current_count]}_width')

            self.losses['train'] = np.append(self.losses['train'], train_loss[-1])
            self.losses['test'] = np.append(self.losses['test'], test_loss[-1])
            self.losses['zero_one_train'] = np.append(self.losses['zero_one_train'], zero_one_train[-1])
            self.losses['zero_one_test'] = np.append(self.losses['zero_one_test'], zero_one_test[-1])

            dd_writer.add_scalar('Ensemble-Double-Descent/Train',
                                 self.losses['train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Test',
                                 self.losses['test'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Train (Zero-One)',
                                 self.losses['zero_one_train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Test (Zero-One)',
                                 self.losses['zero_one_test'][-1],
                                 self.param_counts[self.current_count])
            self.current_count += 1

            if self.current_count < len(self.param_counts):
                self.reinitialize_classifier()

            np.save(f'{self.num_votes}-ensemble-output/train_loss.npy', self.losses['train'])
            np.save(f'{self.num_votes}-ensemble-output/test_loss.npy', self.losses['test'])
            np.save(f'{self.num_votes}-ensemble-output/zero_one_train.npy', self.losses['zero_one_train'])
            np.save(f'{self.num_votes}-ensemble-output/zero_one_test.npy', self.losses['zero_one_test'])
            np.save(f'{self.num_votes}-ensemble-output/parameter_counts', self.param_counts)

        if not self.generate_parameters:

            return {'train_loss': self.losses['train'],
                    'test_loss': self.losses['test'],
                    'zero_one_train': self.losses['zero_one_train'],
                    'zero_one_test': self.losses['zero_one_test'],
                    'parameter_counts': self.param_counts}


        self.current_count -= 1
        flag = False
        while self.post_flag < 4:


            next_ct, flag = get_next_param_count(self.param_counts,
                                                 self.losses['test']/self.losses['test'].sum(),
                                                 flag)
            save_obj(self, f'{self.num_votes}-model/{next_ct}_width')
            self.param_counts = np.append(self.param_counts, next_ct)
            self.current_count += 1
            self.reinitialize_classifier()

            _, train_loss, test_loss, zero_one_train, zero_one_test = self.train()

            self.losses['train'] = np.append(self.losses['train'], train_loss[-1])
            self.losses['test'] = np.append(self.losses['test'], test_loss[-1])
            self.losses['zero_one_train'] = np.append(self.losses['zero_one_train'], zero_one_train[-1])
            self.losses['zero_one_test'] = np.append(self.losses['zero_one_test'], zero_one_test[-1])

            dd_writer.add_scalar('Ensemble-Double-Descent/Train',
                                 self.losses['train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Test',
                                 self.losses['test'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Train (Zero-One)',
                                 self.losses['zero_one_train'][-1],
                                 self.param_counts[self.current_count])
            dd_writer.add_scalar('Ensemble-Double-Descent/Test (Zero-One)',
                                 self.losses['zero_one_test'][-1],
                                 self.param_counts[self.current_count])

            if flag and (self.param_counts[-1] - self.param_counts[-2]) != 1:
                print('Iterating Post Flag')
                self.post_flag += 1
                print(f'Post Flag {self.post_flag}')

            np.save(f'{self.num_votes}-ensemble-output/train_loss.npy', self.losses['train'])
            np.save(f'{self.num_votes}-ensemble-output/test_loss.npy', self.losses['test'])
            np.save(f'{self.num_votes}-ensemble-output/zero_one_train.npy', self.losses['zero_one_train'])
            np.save(f'{self.num_votes}-ensemble-output/zero_one_test.npy', self.losses['zero_one_test'])
            np.save(f'{self.num_votes}-ensemble-output/parameter_counts', self.param_counts)

        return {'train_loss': self.losses['train'],
                'test_loss': self.losses['test'],
                'zero_one_train': self.losses['zero_one_train'],
                'zero_one_test': self.losses['zero_one_test'],
                'parameter_counts': self.param_counts}

In [None]:
class SKLearnModels:
    """This class contains the attributes that all scikit-learn models
    have in common. All scikit-learn models will inherit from this class

    ...
    Parameters (Not Attributes)
    ---------------------------
    dataset : str
        A string that represents the dataset that the user wants to train
        the model on. The current list is {MNIST}

    Attributes
    ----------
    dataset : np.array
        The chosen dataset from the list {MNIST}
    """

    def __init__(self, dataset, samples):

        data_object = SKLearnData()
        data_dict = {'MNIST': data_object.get_mnist}
        X, y, X_val, y_val = data_dict[dataset](samples=samples)
        self.dataset = {'X': X, 'y': y, 'X_val': X_val, 'y_val': y_val}


class RandomForest(SKLearnModels):
    """A Random Forest wrapper that allows for variable numbers of trees
    and maximum leaf nodes

    ...
    Parameters (Not Attributes)
    ---------------------------
    dataset : str
        A string that represents the dataset that the user wants to train
        the model on. The current list is {MNIST}

    Attributes
    ----------
    N_tree : int
        The number of trees
    N_max_leaves : int
        The maximum number of leaf nodes on a tree
    classifier : RandomForestClassifier
        A scikit-learn random forest model
    """

    def __init__(self, dataset='MNIST',
                 N_tree=1,
                 N_max_leaves=10,
                 bootstrap=False,
                 criterion='gini',
                 samples=4000,
                 leaves_limit=2010,
                 tree_limit=30,
                 leaves_iter=100,
                 tree_iter=1):

        self.dataset_name = dataset
        super(RandomForest, self).__init__(dataset, samples)
        self.N_tree = N_tree
        self.samples = samples
        self.N_max_leaves = N_max_leaves
        self.bootstrap = bootstrap
        self.criterion = criterion
        self.leaves_limit = leaves_limit
        self.tree_limit = tree_limit
        self.leaves_iter = leaves_iter
        self.tree_iter = tree_iter
        print('Initializing RandomForest')
        self.classifier = RandomForestClassifier(n_estimators=self.N_tree,
                                                 bootstrap=self.bootstrap,
                                                 criterion=self.criterion,
                                                 max_leaf_nodes=self.N_max_leaves)

    def reinitialize_classifier(self):
        """Helper function for double_descent method"""

        self.classifier = RandomForestClassifier(n_estimators=self.N_tree,
                                                 bootstrap=self.bootstrap,
                                                 criterion=self.criterion,
                                                 max_leaf_nodes=self.N_max_leaves)

    def double_descent(self):
        """Exhibits double descent in random forest by increasing the
        number of parameters (number of trees and leaf nodes) and training
        each model to completion.


        ...
        Returns
        -------
        collected_data : dict
            Dictionary of different losses and model attributes collected
            throughout the training process. The keys are {'train_loss',
            'zero_one_loss', 'mse_loss', 'leaf_sizes', 'trees'}
        """

        leaf_sizes = []
        trees = []

        training_losses = np.array([])
        zero_one_test_losses = np.array([])
        mse_losses = np.array([])

        while self.N_max_leaves <= self.leaves_limit:

            self.classifier.fit(self.dataset['X'], self.dataset['y'])

            train_loss = sk_zero_one_loss(
                self.classifier.predict(self.dataset['X']), self.dataset['y'])

            zero_one_test_loss = sk_zero_one_loss(
                self.classifier.predict(self.dataset['X_val']), self.dataset['y_val'])

            mse_loss = sk_mean_squared_error(
                labels_to_vec(self.dataset['y_val']),
                labels_to_vec(self.classifier.predict(self.dataset['X_val'])))
            print(zero_one_test_loss)
            training_losses = np.append(training_losses, train_loss)
            zero_one_test_losses = np.append(zero_one_test_losses, zero_one_test_loss)
            mse_losses = np.append(mse_losses, mse_loss)

            leaf_sizes.append(self.N_max_leaves)
            trees.append(self.N_tree)

            self.N_max_leaves += self.leaves_iter
            self.reinitialize_classifier()

        self.N_max_leaves = self.N_max_leaves - self.leaves_iter
        while self.N_tree <= self.tree_limit:

            self.reinitialize_classifier()

            self.classifier.fit(self.dataset['X'], self.dataset['y'])

            train_loss = sk_zero_one_loss(
                self.classifier.predict(self.dataset['X']), self.dataset['y'])

            zero_one_test_loss = sk_zero_one_loss(
                self.classifier.predict(self.dataset['X_val']), self.dataset['y_val'])
            print(zero_one_test_loss)
            mse_loss = sk_mean_squared_error(
                labels_to_vec(self.dataset['y_val']),
                labels_to_vec(self.classifier.predict(self.dataset['X_val'])))

            training_losses = np.append(training_losses, train_loss)
            zero_one_test_losses = np.append(zero_one_test_losses, zero_one_test_loss)
            mse_losses = np.append(mse_losses, mse_loss)

            leaf_sizes.append(self.N_max_leaves)
            trees.append(self.N_tree)

            self.N_tree += self.tree_iter


        return {'train_loss': training_losses,
                'zero_one_loss': zero_one_test_losses,
                'mse_loss': mse_losses,
                'leaf_sizes': np.array(leaf_sizes),
                'trees': np.array(trees),
                'samples': self.samples,
                'dataset': self.dataset_name}

class DecisionTree(SKLearnModels):
    """A Decision Tree wrapper that allows for variable numbers of trees
    and maximum leaf nodes

    ...
    Parameters (Not Attributes)
    ---------------------------
    dataset : str
        A string that represents the dataset that the user wants to train
        the model on. The current list is {MNIST}

    Attributes
    ----------
    N_tree : int
        The number of trees
    N_max_leaves : int
        The maximum number of leaf nodes on a tree
    classifier : DecisionTreeClassifier
        A scikit-learn decision tree model
    """

    def __init__(self, dataset='MNIST',
                 criterion='gini',
                 N_max_leaves=10,
                 max_depth=1,
                 samples=4000,
                 leaves_limit=2010,
                 max_features=None,
                 leaves_iter=10,
                 depth_iter=10,
                 depth_limit=1000):

        self.dataset_name = dataset
        super(DecisionTree, self).__init__(dataset, samples)
        self.samples = samples
        self.N_max_leaves = N_max_leaves
        self.criterion = criterion
        self.max_features = max_features
        self.leaves_iter = leaves_iter
        self.leaves_limit = leaves_limit
        self.max_depth = max_depth
        self.depth_iter = depth_iter
        self.depth_limit = depth_limit
        print('Initializing DecisionTree')
        self.classifier = DecisionTreeClassifier(criterion=self.criterion,
                                                 max_leaf_nodes=self.N_max_leaves,
                                                 max_features=self.max_features,
                                                 max_depth=self.max_depth)

    def reinitialize_classifier(self):
        """Helper function for double_descent method"""

        self.classifier = DecisionTreeClassifier(criterion=self.criterion,
                                                 max_leaf_nodes=self.N_max_leaves,
                                                 max_features=self.max_features,
                                                 max_depth=self.max_depth)

    def double_descent(self):
        """Exhibits double descent in random forest by increasing the
        number of parameters (number of trees and leaf nodes) and training
        each model to completion.


        ...
        Returns
        -------
        collected_data : dict
            Dictionary of different losses and model attributes collected
            throughout the training process. The keys are {'train_loss',
            'zero_one_loss', 'mse_loss', 'leaf_sizes', 'trees'}
        """

        leaf_sizes = []
        depths = []

        training_losses = np.array([])
        zero_one_test_losses = np.array([])
        mse_losses = np.array([])

        while self.N_max_leaves <= self.leaves_limit:
            print(self.N_max_leaves)
            self.classifier.fit(self.dataset['X'], self.dataset['y'])

            train_loss = sk_zero_one_loss(
                self.classifier.predict(self.dataset['X']), self.dataset['y'])

            zero_one_test_loss = sk_zero_one_loss(
                self.classifier.predict(self.dataset['X_val']), self.dataset['y_val'])

            mse_loss = sk_mean_squared_error(
                labels_to_vec(self.dataset['y_val']),
                labels_to_vec(self.classifier.predict(self.dataset['X_val'])))

            training_losses = np.append(training_losses, train_loss)
            zero_one_test_losses = np.append(zero_one_test_losses, zero_one_test_loss)
            mse_losses = np.append(mse_losses, mse_loss)
            print(zero_one_test_loss)
            leaf_sizes.append(self.N_max_leaves)
            depths.append(self.max_depth)

            self.N_max_leaves += self.leaves_iter
            self.reinitialize_classifier()

        self.N_max_leaves -= self.leaves_limit

        while self.max_depth <= self.depth_limit:
            self.reinitialize_classifier()

            self.classifier.fit(self.dataset['X'], self.dataset['y'])

            train_loss = sk_zero_one_loss(
                self.classifier.predict(self.dataset['X']), self.dataset['y'])

            zero_one_test_loss = sk_zero_one_loss(
                self.classifier.predict(self.dataset['X_val']), self.dataset['y_val'])
            print(zero_one_test_loss)
            mse_loss = sk_mean_squared_error(
                labels_to_vec(self.dataset['y_val']),
                labels_to_vec(self.classifier.predict(self.dataset['X_val'])))

            training_losses = np.append(training_losses, train_loss)
            zero_one_test_losses = np.append(zero_one_test_losses, zero_one_test_loss)
            mse_losses = np.append(mse_losses, mse_loss)

            leaf_sizes.append(self.N_max_leaves)
            depths.append(self.max_depth)

            self.max_depth += self.depth_iter


        return {'train_loss': training_losses,
                'zero_one_loss': zero_one_test_losses,
                'mse_loss': mse_losses,
                'leaf_sizes': np.array(leaf_sizes),
                'depths': np.array(depths),
                'samples': self.samples,
                'dataset': self.dataset_name}

class AdaBoost(SKLearnModels):
    """An AdaBoost wrapper that allows for variable numbers of trees
    and maximum leaf nodes

    ...
    Parameters (Not Attributes)
    ---------------------------
    dataset : str
        A string that represents the dataset that the user wants to train
        the model on. The current list is {MNIST}

    Attributes
    ----------
    N_tree : int
        The number of trees
    N_max_leaves : int
        The maximum number of leaf nodes on a tree
    classifier : AdaBoostClassifier
        A scikit-learn random forest model
    """

    def __init__(self, dataset='MNIST',
                 N_tree=1,
                 N_forest=1,
                 N_max_leaves=10,
                 bootstrap=False,
                 criterion='gini',
                 samples=10000,
                 tree_limit=50,
                 forest_limit=30,
                 tree_iter=1,
                 forest_iter=1,
                 max_features='sqrt',
                 learning_rate=0.85):

        self.dataset_name = dataset
        super(AdaBoost, self).__init__(dataset, samples)
        self.N_tree = N_tree
        self.N_forest = N_forest
        self.samples = samples
        self.N_max_leaves = N_max_leaves
        self.bootstrap = bootstrap
        self.criterion = criterion
        self.forest_limit = forest_limit
        self.tree_limit = tree_limit
        self.forest_iter = forest_iter
        self.tree_iter = tree_iter
        self.learning_rate = learning_rate
        self.max_features = max_features
        print('Initializing AdaBoost')
        self.estimator = DecisionTreeClassifier(criterion=self.criterion,
                                                max_leaf_nodes=self.N_max_leaves,
                                                max_features = self.max_features)

        self.classifier = [AdaBoostClassifier(estimator=self.estimator,
                                             n_estimators=self.N_tree,
                                             learning_rate=self.learning_rate) for _ in range(self.N_forest)]

    def reinitialize_classifier(self):
        """Helper function for double_descent method"""
        if self.N_forest == len(self.classifier):
          self.classifier = [AdaBoostClassifier(estimator=self.estimator,
                                              n_estimators=self.N_tree,
                                              learning_rate=self.learning_rate) for _ in range(self.N_forest)]
        else:
          self.classifier.extend([AdaBoostClassifier(estimator=self.estimator,
                                                     n_estimators=self.N_tree,
                                                     learning_rate=self.learning_rate) for _ in range(self.N_forest - len(self.classifier))])

    def double_descent(self):
        """Exhibits double descent in random forest by increasing the
        number of parameters (number of trees and leaf nodes) and training
        each model to completion.


        ...
        Returns
        -------
        collected_data : dict
            Dictionary of different losses and model attributes collected
            throughout the training process. The keys are {'train_loss',
            'zero_one_loss', 'mse_loss', 'leaf_sizes', 'trees'}
        """
        trees = []
        forests = []

        training_losses = np.array([])
        zero_one_test_losses = np.array([])
        mse_losses = np.array([])

        while self.N_tree <= self.tree_limit:
            for i in range(self.N_forest):
              self.classifier[i].fit(self.dataset['X'], self.dataset['y'])

            train_loss = np.mean([sk_zero_one_loss(
                self.classifier[i].predict(self.dataset['X']), self.dataset['y']) for i in range(self.N_forest)])

            zero_one_test_loss = np.mean([sk_zero_one_loss(
                self.classifier[i].predict(self.dataset['X_val']), self.dataset['y_val']) for i in range(self.N_forest)])

            mse_loss = np.mean([sk_mean_squared_error(
                labels_to_vec(self.dataset['y_val']),
                labels_to_vec(self.classifier[i].predict(self.dataset['X_val']))) for i in range(self.N_forest)])
            print(self.N_tree, zero_one_test_loss)
            training_losses = np.append(training_losses, train_loss)
            zero_one_test_losses = np.append(zero_one_test_losses, zero_one_test_loss)
            mse_losses = np.append(mse_losses, mse_loss)

            forests.append(self.N_forest)
            trees.append(self.N_tree)

            self.N_tree += self.tree_iter
            self.reinitialize_classifier()

        self.N_tree = self.N_tree - self.tree_iter
        while self.N_forest <= self.forest_limit:
            self.reinitialize_classifier()

            for i in range(self.N_forest - self.forest_iter, self.N_forest):
              self.classifier[i].fit(self.dataset['X'], self.dataset['y'])
            predictions_train = stats.mode(np.array(
                [self.classifier[i].predict(self.dataset['X']) for i in range(self.N_forest)]).astype(int), keepdims=True)[0].reshape(-1).astype(str)

            predictions_test = stats.mode(np.array(
                [self.classifier[i].predict(self.dataset['X_val']) for i in range(self.N_forest)]).astype(int), keepdims=True)[0].reshape(-1).astype(str)

            train_loss = sk_zero_one_loss(predictions_train, self.dataset['y'])

            zero_one_test_loss = sk_zero_one_loss(predictions_test, self.dataset['y_val'])

            mse_loss = sk_mean_squared_error(
                labels_to_vec(self.dataset['y_val']),
                labels_to_vec(predictions_test))
            print(self.N_forest, zero_one_test_loss)
            training_losses = np.append(training_losses, train_loss)
            zero_one_test_losses = np.append(zero_one_test_losses, zero_one_test_loss)
            mse_losses = np.append(mse_losses, mse_loss)

            forests.append(self.N_forest)
            trees.append(self.N_tree)

            self.N_forest += self.forest_iter


        return {'train_loss': training_losses,
                'zero_one_loss': zero_one_test_losses,
                'mse_loss': mse_losses,
                'forests': np.array(forests),
                'trees': np.array(trees),
                'samples': self.samples,
                'dataset': self.dataset_name}

class L2Boost(SKLearnModels):
    """An AdaBoost wrapper that allows for variable numbers of trees
    and maximum leaf nodes

    ...
    Parameters (Not Attributes)
    ---------------------------
    dataset : str
        A string that represents the dataset that the user wants to train
        the model on. The current list is {MNIST}

    Attributes
    ----------
    N_tree : int
        The number of trees
    N_max_leaves : int
        The maximum number of leaf nodes on a tree
    classifier : AdaBoostClassifier
        A scikit-learn random forest model
    """

    def __init__(self, dataset='MNIST',
                 N_tree=1,
                 N_forest=1,
                 N_max_leaves=10,
                 criterion='friedman_mse',
                 samples=10000,
                 tree_limit=50,
                 forest_limit=30,
                 tree_iter=1,
                 forest_iter=1,
                 max_features='sqrt',
                 learning_rate=0.85):

        self.dataset_name = dataset
        super(L2Boost, self).__init__(dataset, samples)
        self.N_tree = N_tree
        self.N_forest = N_forest
        self.samples = samples
        self.N_max_leaves = N_max_leaves
        self.criterion = criterion
        self.forest_limit = forest_limit
        self.tree_limit = tree_limit
        self.forest_iter = forest_iter
        self.tree_iter = tree_iter
        self.learning_rate = learning_rate
        self.max_features = max_features
        print('Initializing L2-Boosting Tree')

        self.classifier = [GradientBoostingClassifier(criterion=self.criterion,
                                                      learning_rate=self.learning_rate,
                                                      n_estimators=self.N_tree,
                                                      max_features=self.max_features,
                                                      max_leaf_nodes=self.N_max_leaves) for _ in range(self.N_forest)]

    def reinitialize_classifier(self):
        """Helper function for double_descent method"""
        if self.N_forest == len(self.classifier):
          self.classifier = [GradientBoostingClassifier(criterion=self.criterion,
                                                      learning_rate=self.learning_rate,
                                                      n_estimators=self.N_tree,
                                                      max_features=self.max_features,
                                                      max_leaf_nodes=self.N_max_leaves) for _ in range(self.N_forest)]
        else:
          self.classifier.extend([GradientBoostingClassifier(criterion=self.criterion,
                                                      learning_rate=self.learning_rate,
                                                      n_estimators=self.N_tree,
                                                      max_features=self.max_features,
                                                      max_leaf_nodes=self.N_max_leaves) for _ in range(self.N_forest - len(self.classifier))])

    def double_descent(self):
        """Exhibits double descent in random forest by increasing the
        number of parameters (number of trees and leaf nodes) and training
        each model to completion.


        ...
        Returns
        -------
        collected_data : dict
            Dictionary of different losses and model attributes collected
            throughout the training process. The keys are {'train_loss',
            'zero_one_loss', 'mse_loss', 'leaf_sizes', 'trees'}
        """
        trees = []
        forests = []

        training_losses = np.array([])
        zero_one_test_losses = np.array([])
        mse_losses = np.array([])

        while self.N_tree <= self.tree_limit:
            for i in range(self.N_forest):
              self.classifier[i].fit(self.dataset['X'], self.dataset['y'])

            train_loss = np.mean([sk_zero_one_loss(
                self.classifier[i].predict(self.dataset['X']), self.dataset['y']) for i in range(self.N_forest)])

            zero_one_test_loss = np.mean([sk_zero_one_loss(
                self.classifier[i].predict(self.dataset['X_val']), self.dataset['y_val']) for i in range(self.N_forest)])

            mse_loss = np.mean([sk_mean_squared_error(
                labels_to_vec(self.dataset['y_val']),
                labels_to_vec(self.classifier[i].predict(self.dataset['X_val']))) for i in range(self.N_forest)])
            print(self.N_tree, zero_one_test_loss)
            training_losses = np.append(training_losses, train_loss)
            zero_one_test_losses = np.append(zero_one_test_losses, zero_one_test_loss)
            mse_losses = np.append(mse_losses, mse_loss)

            forests.append(self.N_forest)
            trees.append(self.N_tree)

            self.N_tree += self.tree_iter
            self.reinitialize_classifier()

        self.N_tree = self.N_tree - self.tree_iter
        while self.N_forest <= self.forest_limit:
            self.reinitialize_classifier()

            for i in range(self.N_forest - self.forest_iter, self.N_forest):
              self.classifier[i].fit(self.dataset['X'], self.dataset['y'])
            predictions_train = stats.mode(np.array(
                [self.classifier[i].predict(self.dataset['X']) for i in range(self.N_forest)]).astype(int), keepdims=True)[0].reshape(-1).astype(str)

            predictions_test = stats.mode(np.array(
                [self.classifier[i].predict(self.dataset['X_val']) for i in range(self.N_forest)]).astype(int), keepdims=True)[0].reshape(-1).astype(str)

            train_loss = sk_zero_one_loss(predictions_train, self.dataset['y'])

            zero_one_test_loss = sk_zero_one_loss(predictions_test, self.dataset['y_val'])

            mse_loss = sk_mean_squared_error(
                labels_to_vec(self.dataset['y_val']),
                labels_to_vec(predictions_test))
            print(self.N_forest, zero_one_test_loss)
            training_losses = np.append(training_losses, train_loss)
            zero_one_test_losses = np.append(zero_one_test_losses, zero_one_test_loss)
            mse_losses = np.append(mse_losses, mse_loss)

            forests.append(self.N_forest)
            trees.append(self.N_tree)

            self.N_forest += self.forest_iter


        return {'train_loss': training_losses,
                'zero_one_loss': zero_one_test_losses,
                'mse_loss': mse_losses,
                'forests': np.array(forests),
                'trees': np.array(trees),
                'samples': self.samples,
                'dataset': self.dataset_name}

# New Section

In [None]:
model = MultilayerPerceptron(cuda=True,
                             loss='CrossEntropy',
                             param_counts=np.array([3, 4, 7, 10, 15, 20, 23, 27, 31, 32, 33, 34, 36, 38, 40, 41, 42, 43, 44, 60, 80, 100, 150, 300, 800]),
                             generate_parameters=False,
                             max_epochs=2000,
                             scheduler_step_size=500,
                             batch_size=128,
                             seed=0,
                             reuse_weights=True)
outs = model.double_descent()
save_obj(outs, 'mlp-dd-experiment')

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir mlp-runs

In [None]:
from google.colab import drive
drive.mount('/content/drive')
os.chdir("/content/")
!cp -r data '/content/drive/My Drive/6.s898 Project/Project'
!cp -r mlp-output '/content/drive/My Drive/6.s898 Project/Project'
!cp -r mlp-runs '/content/drive/My Drive/6.s898 Project/Project'
!cp -r mlp-model '/content/drive/My Drive/6.s898 Project/Project'
!cp mlp-dd-experiment.pkl '/content/drive/My Drive/6.s898 Project/Project'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!mkdir mlp-100-epochs

In [None]:
os.chdir('/content/mlp-100-epochs')
!mkdir mlp-model

In [None]:
small_model = MultilayerPerceptron(cuda=True,
                             loss='CrossEntropy',
                             param_counts=np.array([3, 4, 7, 10, 15, 20, 23, 27, 31, 32, 33, 34, 36, 38, 40, 41, 42, 43, 44, 60, 80, 100, 150, 300, 800]),
                             generate_parameters=False,
                             max_epochs=100,
                             scheduler_step_size=50,
                             batch_size=128,
                             seed=0,
                             reuse_weights=True)
small_outs = small_model.double_descent()
save_obj(small_outs, 'mlp-dd-experiment')

In [None]:
%tensorboard --logdir mlp-runs --port 6898

In [None]:
os.chdir('/content')
!cp -r mlp-100-epochs '/content/drive/My Drive/6.s898 Project/Project'

In [None]:
os.chdir('/content/')
!rm -r mlp-500-epochs

In [None]:
!mkdir mlp-500-epochs
os.chdir('/content/mlp-500-epochs')
!mkdir mlp-model
medium_model = MultilayerPerceptron(cuda=True,
                             loss='CrossEntropy',
                             param_counts=np.array([3, 4, 7, 10, 15, 20, 23, 27, 31, 32, 33, 34, 36, 38, 40, 41, 42, 43, 44, 60, 80, 100, 150, 300, 800]),
                             generate_parameters=False,
                             max_epochs=500,
                             scheduler_step_size=100,
                             batch_size=128,
                             seed=0,
                             reuse_weights=True)

medium_outs = medium_model.double_descent()
save_obj(medium_outs, 'mlp-dd-experiment')

In [None]:
%tensorboard --logdir mlp-runs --port 8008

In [None]:
# os.chdir('/content')
# from google.colab import drive
# drive.mount('/content/drive')
# os.chdir("/content/")
# !cp -r mlp-500-epochs '/content/drive/My Drive/6.s898 Project/Project'

Mounted at /content/drive


In [None]:

!rm -r ensemble-output
!rm -r ensemble-runs
!rm -r ensemble-model
!mkdir ensemble-model

rm: cannot remove 'ensemble-output': No such file or directory
rm: cannot remove 'ensemble-runs': No such file or directory


In [None]:
model = EnsembleMLP(cuda=True,
                    loss='CrossEntropy',
                    param_counts=np.array([3, 4, 7, 10, 15, 20, 23, 27, 31, 32, 33, 34, 36, 38, 40, 41, 42, 43, 44, 60, 80, 100, 150, 300, 800]),
                    generate_parameters=False,
                    max_epochs=100,
                    scheduler_step_size=500,
                    batch_size=128,
                    seed=0,
                    reuse_weights=True,
                    num_models=5)
outs = model.double_descent()
save_obj(outs, 'ensemble-dd-experiment')

In [None]:
!kill 25990
%load_ext tensorboard
%tensorboard --logdir ensemble-runs --port 2003

In [None]:
from google.colab import drive
drive.mount('/content/drive')
os.chdir("/content/drive/My Drive/6.s898 Project/Project")
!mkdir ensemble-100-epochs
os.chdir("/content")

Mounted at /content/drive


In [None]:
os.chdir("/content")
!cp -r data "/content/drive/My Drive/6.s898 Project/Project/ensemble-100-epochs"
!cp -r ensemble-model "/content/drive/My Drive/6.s898 Project/Project/ensemble-100-epochs"
!cp -r ensemble-output "/content/drive/My Drive/6.s898 Project/Project/ensemble-100-epochs"
!cp -r ensemble-runs "/content/drive/My Drive/6.s898 Project/Project/ensemble-100-epochs"
!cp ensemble-dd-experiment.pkl "/content/drive/My Drive/6.s898 Project/Project/ensemble-100-epochs"

In [None]:
drive.flush_and_unmount()
!mkdir ensemble-500-epochs
os.chdir("/content/ensemble-500-epochs")

In [None]:
!sudo rm -r data
!sudo rm -r ensemble-runs
!mkdir ensemble-model

In [None]:
med_model = EnsembleMLP(cuda=True,
                    loss='CrossEntropy',
                    param_counts=np.array([3, 4, 7, 10, 15, 20, 23, 27, 31, 32, 33, 34, 36, 38, 40, 41, 42, 43, 44, 60, 80, 100, 150, 300, 800]),
                    generate_parameters=False,
                    max_epochs=500,
                    scheduler_step_size=500,
                    batch_size=128,
                    seed=0,
                    reuse_weights=True,
                    num_models=5)
med_outs = med_model.double_descent()
save_obj(med_outs, 'ensemble-dd-experiment')

In [None]:
%load_ext tensorboard
%tensorboard --logdir ensemble-runs --port 1210

In [None]:
drive.mount('/content/drive')
os.chdir("/content")
!cp -r ensemble-500-epochs "/content/drive/My Drive/6.s898 Project/Project/"

Mounted at /content/drive


In [None]:
model = load_obj('ensemble-500-epochs/ensemble-model/100_width')
model

In [None]:
from google.colab import drive
drive.mount('/content/drive')
os.chdir('/content')

In [None]:
!cp -r '/content/drive/My Drive/6.s898 Project/Project/mlp-100-epochs' '/content'

In [None]:
!mkdir scikit_learn_data
def generate_set(samples):
      data_object = SKLearnData()
      data_dict = {'MNIST': data_object.get_mnist}
      X, y, X_val, y_val = data_dict['MNIST'](samples=samples)
      return {'X': X, 'y': y, 'X_val': X_val, 'y_val': y_val}
dataset = generate_set(10000)
model1 = LogisticRegression(C=64, max_iter=10000)
model2 = RandomForestClassifier(n_estimators=10, max_leaf_nodes=2000, criterion='gini')
model3 = DecisionTreeClassifier(criterion='gini', max_leaf_nodes=2000, max_features=392, max_depth=100)
model4 = [GradientBoostingClassifier(criterion='friedman_mse', learning_rate=0.85, n_estimators=20, max_features='sqrt', max_leaf_nodes=10) for _ in range(5)]
model1.fit(dataset['X'], dataset['y'])
model2.fit(dataset['X'], dataset['y'])
model3.fit(dataset['X'], dataset['y'])
for model in model4:
      model.fit(dataset['X'], dataset['y'])

In [None]:
model2 = RandomForestClassifier(n_estimators=10, max_leaf_nodes=2000, criterion='gini')
model3 = DecisionTreeClassifier(criterion='gini', max_leaf_nodes=2000, max_depth=100)
model4 = [GradientBoostingClassifier(criterion='friedman_mse', learning_rate=0.85, n_estimators=20, max_features='sqrt', max_leaf_nodes=10) for _ in range(5)]
model2.fit(dataset['X'], dataset['y'])
model3.fit(dataset['X'], dataset['y'])
for model in model4:
      model.fit(dataset['X'], dataset['y'])

In [None]:
os.chdir("/content/Strong-Classifier-100-Epochs")
!rm -r ensemble-100-epochs-2-votes

In [None]:
for i in range(2, 4):
  !mkdir -p "ensemble-100-epochs-$i-votes"
  os.chdir(f'ensemble-100-epochs-{i}-votes')
  !mkdir -p "$i-ensemble-model"
  model = WeakEnsemble(cuda=True,
                      loss='CrossEntropy',
                      param_counts=np.array([3, 4, 7, 10, 15, 20, 23, 27, 31, 32, 33, 34, 36, 38, 40, 41, 42, 43, 44, 60, 80, 100, 150, 300, 800]),
                      generate_parameters=False,
                      max_epochs=100,
                      scheduler_step_size=500,
                      batch_size=128,
                      seed=0,
                      reuse_weights=True,
                      num_votes=i,
                      model1=model1,
                      model2=model2,
                      model3=model3,
                      model4=model4)
  outs = model.double_descent()
  save_obj(outs, f'ensemble-dd-experiment')
  os.chdir('/content/Strong-Classifier-100-Epochs')

In [None]:
os.chdir("/content/")
!cp -r Strong-Classifier-100-Epochs "/content/drive/MyDrive/6.s898 Project/Project"

In [None]:
os.chdir("/content/Strong-Classifier-100-Epochs")
%load_ext tensorboard
%tensorboard --logdir ensemble-100-epochs-3-votes/3-ensemble-runs --port 1999

In [None]:
!rm -r ensemble-100-epochs-1-votes
!rm -r ensemble-100-epochs-2-votes
!rm -r ensemble-100-epochs-3-votes
!rm -r ensemble-100-epochs-4-votes