In [13]:
!pip --quiet install wandb
import wandb
import os
os.environ["WANDB_SILENT"] = "true"
wandb.login()

True

In [7]:
from keras.datasets import fashion_mnist
import numpy as np
from tqdm import tqdm
from utils import preprocess

np.random.seed(10)

# Loading FMNIST
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, y_train, x_val, y_val, x_test, y_test = preprocess(x_train, x_test, y_train, y_test, preprocess_type='img_standardize')


# Loading MNIST
(mnist_x_train, mnist_y_train), (mnist_x_test, mnist_y_test) = mnist.load_data()
mnist_x_train, mnist_y_train, mnist_x_val, mnist_y_val, mnist_x_test, mnist_y_test = preprocess(mnist_x_train, mnist_x_test, mnist_y_train, mnist_y_test, preprocess_type='img_standardize')

In [10]:
from activations import activation_dict, Softmax
from layers import Linear
from loss_function import Categorical_CE
from optimizers import optimizer_dict
from backprop import Backprop
from hyperparams import bayes_sweep_config, random_sweep_config, default_config

In [16]:
class fNN:

    def __init__(self, input_dims, n_classes, x_val, y_val, loss_fn_type, activation_type, batch_size, num_epochs, n_hidden_neurons, 
                 lamda, lr, n_hidden_layers, optimizer_type, weight_init_type):
        
        self.NN = []
        self.activations = []

        self.num_epochs = num_epochs
        self.n_hidden_layers = n_hidden_layers
        self.batch_size = batch_size

        self.activation_fn, gain = activation_dict[activation_type] # Set activation for hidden layers
        self.softmax_activation = Softmax()

        # Choose loss function
        if loss_fn_type == 'CCE':
            self.loss_fn = Categorical_CE()
        elif loss_fn_type == 'MSE':
            self.loss_fn = MSE()

        # Create feedforward NN
        self.create_model(input_dims, n_classes, n_hidden_neurons, weight_init_type, activation_type, gain)

        optimizer = optimizer_dict[optimizer_type] # Set optimizer
        self.backprop = Backprop(Categorical_CE(), optimizer(lr, lamda, n_hidden_layers))

        # Validation data
        self.x_val = x_val
        self.y_val = y_val

    def create_model(self, input_dims, n_classes, n_hidden_neurons, weight_init_type, activation_type, gain):

        for l in range(self.n_hidden_layers+1):
            fc = None
            if l == 0:
                #fc = Linear(input_dims, n_hidden_neurons[l]+1, weight_init_type, gain)
                fc = Linear(input_dims, n_hidden_neurons+1, weight_init_type, activation_type, gain)
                self.activations.append(self.activation_fn)
            elif l == self.n_hidden_layers:
                #fc = Linear(n_hidden_neurons[l-1]+1, n_classes, weight_init_type, gain)
                fc = Linear(n_hidden_neurons+1, n_classes, weight_init_type, activation_type, gain)
                self.activations.append(self.softmax_activation)
            else:
                #fc = Linear(n_hidden_neurons[l-1]+1, n_hidden_neurons[l]+1, weight_init_type, gain)
                fc = Linear(n_hidden_neurons+1, n_hidden_neurons+1, weight_init_type, activation_type, gain)
                self.activations.append(self.activation_fn)

            # Creating network
            self.NN.append(fc)
        
    def forward(self, x):

        layer_wise_output = [x]
        for i in range(self.n_hidden_layers+1):
            x = self.NN[i](x)
            x = self.activations[i](x)
            layer_wise_output.append(x)
        
        return layer_wise_output

    def fit(self, x_train, y_train):
        
        n_data = x_train.shape[0]
        train_loss = []
        for ep in range(self.num_epochs):
            d = 0
            epoch_loss = []
            while d < n_data:
                loss = 0
                t = min(n_data - d, self.batch_size)

                x_mini_batch = x_train[d:d+t]
                y_mini_batch = y_train[d:d+t]

                layer_wise_output = self.forward(x_mini_batch)
                loss = self.loss_fn.loss(layer_wise_output[-1], y_mini_batch)
                epoch_loss.append(loss)
                self.NN = self.backprop.backward(self.activations, layer_wise_output, self.NN, y_mini_batch)
                
                d += self.batch_size
            
            #train_loss.append(np.mean(epoch_loss))
            val_acc, val_loss, _, _ = self.evaluate(self.x_val, self.y_val)
            wandb.log({'train_loss': np.mean(epoch_loss), 'val_loss': val_loss, 'val_acc': val_acc})

    def evaluate(self, x, y):

        # Evaluate model
        n_datapoints = x.shape[0]
        layer_wise_outputs = self.forward(x)
        y_hat = layer_wise_outputs[-1]
        eval_loss = self.loss_fn.loss(y_hat, y) / n_datapoints
        y_pred = np.argmax(y_hat, axis=1)
        y_true = np.argmax(y, axis=1)
        accuracy = (len(np.argwhere(y_pred == y_true))/n_datapoints)*100

        return accuracy, eval_loss, y_pred, y_true

In [9]:
# Train function runs wandb sweeps
def train():
    wandb.init(config=default_config)
    config = wandb.config
    n_classes = 10
    input_dims = x_train.shape[1]
    
    hyperparams = config._as_dict()
    wandb.run.name = hyperparams["activation"] + "_bs_"+ str(hyperparams["batch_size"]) + "_ep_" + str(hyperparams["epochs"]) + "_hl_" + str(hyperparams["n_hidden_layers"]) + "_" + hyperparams["optimizer"] + "_" + hyperparams["weight_init_type"]
    model = fNN(input_dims, n_classes, x_val, y_val, 'CCE', *list(config._as_dict().values())[:-1])
    model.fit(x_train, y_train)  # your model training code here
    test_acc, _, _, _ = model.evaluate(x_test, y_test)
    wandb.log({'test_acc': test_acc})
    wandb.finish(quiet=True)

### Bayesian Sweep

In [12]:
sweep_id = wandb.sweep(bayes_sweep_config, project="test", entity="kbdl")
wandb.agent(sweep_id, function=train(), count=50)
wandb.finish()

### Random sweep

In [None]:
sweep_id = wandb.sweep(random_sweep_config, project="test", entity="kbdl")
wandb.agent(sweep_id, function=train(), count=50)
wandb.finish()

### Question 7

In [13]:
optimal_model = {
    "activation" : 'sigmoid',
    "batch_size" : 16,
    "epochs" : 10,
    "hidden_layer_size": 128,
    "lamda": 0,
    "lr" : 0.001,
    "n_hidden_layers" : 3,
    "optimizer": "nadam",
    "weight_init_type" : 'xavier',
}

In [19]:
for lossfn in ['CCE', 'MSE']:
    wandb.init(config=optimal_model, project="test", entity="kbdl")
    config = wandb.config
    n_classes = 10
    input_dims = x_train.shape[1]
    
    hyperparams = config._as_dict()
    wandb.run.name = hyperparams["activation"] + "_bs_"+ str(hyperparams["batch_size"]) + "_ep_" + str(hyperparams["epochs"]) + "_hl_" + str(hyperparams["n_hidden_layers"]) + "_" + hyperparams["optimizer"] + "_" + hyperparams["weight_init_type"]
    wandb.run.name += "_" + lossfn
    model = fNN(input_dims, n_classes, x_val, y_val, lossfn, *list(hyperparams.values())[:-1])
    model.fit(x_train, y_train)  # your model training code here

wandb.finish(quiet=True)

### Question 8

In [21]:
wandb.init(config=optimal_model, project="test", entity="kbdl")
config = wandb.config
n_classes = 10
input_dims = x_train.shape[1]

hyperparams = config._as_dict()
wandb.run.name = "optimal_model_conf_matrix"
model = fNN(input_dims, n_classes, x_val, y_val, 'CCE', *list(hyperparams.values())[:-1])
model.fit(x_train, y_train)  # your model training code here
test_acc, _, y_pred, y_true = model.evaluate(x_test, y_test)
wandb.log({"conf_mat" : wandb.plot.confusion_matrix(preds=y_pred, y_true=y_true, class_names=["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat", "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"])})

wandb.finish(quiet=True)

### Question 10

In [11]:
# Rank 1 model based on validation accuracy
model_a = {
    "activation" : 'sigmoid',
    "batch_size" : 16,
    "epochs" : 10,
    "hidden_layer_size": 128,
    "lamda": 0,
    "lr" : 0.001,
    "n_hidden_layers" : 3,
    "optimizer": "nadam",
    "weight_init_type" : 'xavier',
}

# Rank 2 model based on validation accuracy
model_b = {
    "activation" : 'tanh',
    "batch_size" : 64,
    "epochs" : 5,
    "hidden_layer_size": 128,
    "lamda": 0,
    "lr" : 0.001,
    "n_hidden_layers" : 3,
    "optimizer": "nadam",
    "weight_init_type" : 'xavier',
}

# Rank 3 model based on validation accuracy
model_c = {
    "activation" : 'sigmoid',
    "batch_size" : 16,
    "epochs" : 10,
    "hidden_layer_size": 128,
    "lamda": 0,
    "lr" : 0.001,
    "n_hidden_layers" : 4,
    "optimizer": "nadam",
    "weight_init_type" : 'xavier',
}

In [17]:
mnist_runs = {'model_a': model_a, 'model_b': model_b, 'model_c': model_c}
for model_name, model_config in mnist_runs.items():
    wandb.init(config=model_config, project="test", entity="kbdl")
    config = wandb.config
    n_classes = 10
    input_dims = mnist_x_train.shape[1]

    hyperparams = config._as_dict()
    wandb.run.name = "mnist" + model_name
    model = fNN(input_dims, n_classes, mnist_x_val, mnist_y_val, 'CCE', *list(hyperparams.values())[:-1])
    model.fit(mnist_x_train, mnist_y_train)  # your model training code here
    #test_acc, _, y_pred, y_true = model.evaluate(x_test, y_test)
    wandb.finish(quiet=True)