In [1]:
"""
!pip --quiet install wandb
import wandb
import os
#os.environ["WANDB_SILENT"] = "false"
wandb.login()
"""

'\n!pip --quiet install wandb\nimport wandb\nimport os\n#os.environ["WANDB_SILENT"] = "false"\nwandb.login()\n'

In [2]:
from keras.datasets import fashion_mnist
import numpy as np
from tqdm import tqdm
from activations import activation_dict, Softmax
from utils import preprocess
from layers import Linear
from loss_function import Categorical_CE
from backprop import Backprop

np.random.seed(0)

(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
x_train, x_test, y_train, y_test = preprocess(x_train, x_test, y_train, y_test, preprocess_type='normalize')

In [23]:
class SGD:

    def __init__(self, lr):
        self.lr = lr

    def step(self, network, grads):
        
        n_layer_weights = len(network)
        for i in range(n_layer_weights):
            network[i].weights -= self.lr * grads[i]
        
        return network

class SGDMomentum:

    def __init__(self, lr, mu):
        self.lr = lr
        self.mu = mu
        self.history = []

    def step(self, network, grads):

        n_layer_weights = len(network)
        for i in range(n_layer_weights):
            t = self.lr * grads[i]
            if len(self.history) != n_layer_weights:
                # Same as initializing history to 0
                self.history.append(t)
            else:
                self.history[i] = self.mu * self.history[i] + t

            # Updating layer weights
            network[i].weights -= self.history[i]
        
        return network

class RMSProp:

    def __init__(self, lr, beta):
        self.lr = lr
        self.beta = beta
        self.epsilon = 1e-6
        self.history = []
        
    def step(self, network, grads):

        n_layer_weights = len(network)
        for i in range(n_layer_weights):
            t = (1 - self.beta) * np.power(grads[i], 2)
            if len(self.history) != n_layer_weights:
                # Same as initializing history to 0
                self.history.append(t)
            else:
                self.history[i] = self.beta * self.history[i] + t

            per_weight_hist = np.sqrt(self.history[i] + self.epsilon)
            # Updating layer weights
            network[i].weights -= self.lr * np.divide(grads[i], per_weight_hist)
    
        return network

In [24]:
class fNN:

    def __init__(self, input_dims, n_classes, activation, batch_size, num_epochs, n_hidden_neurons, 
                 lr, n_hidden_layers, weight_init_type):
        
        self.NN = []
        self.activations = []
                
        self.num_epochs = num_epochs
        self.n_hidden_layers = n_hidden_layers
        self.lr = lr
        self.batch_size = batch_size
        self.activation_dict = activation_dict

        self.activation_fn, gain = self.activation_dict[activation]
        self.softmax_activation = Softmax()
        self.loss_fn = Categorical_CE()
        self.backprop = Backprop()

        optim_hyperparams = 0
        self.create_model(weight_init_type, gain)

        #self.optimizer = SGD(lr)
        #self.optimizer = SGDMomentum(lr, 0.9)
        self.optimizer = RMSProp(lr, 0.9)

    def create_model(self, weight_init_type, gain):

        for l in range(self.n_hidden_layers+1):
            fc = None
            if l == 0:
                fc = Linear(input_dims, n_hidden_neurons[l]+1, weight_init_type, gain)
                self.activations.append(self.activation_fn)
            elif l == self.n_hidden_layers:
                fc = Linear(n_hidden_neurons[l-1]+1, n_classes, weight_init_type, gain)
                self.activations.append(self.softmax_activation)
            else:
                fc = Linear(n_hidden_neurons[l-1]+1, n_hidden_neurons[l]+1, weight_init_type, gain)
                self.activations.append(self.activation_fn)

            self.NN.append(fc)
        
    def forward(self, x):

        layer_wise_output = [x]
        for i in range(self.n_hidden_layers+1):
            x = self.NN[i](x)
            x = self.activations[i](x)
            layer_wise_output.append(x)
        
        return layer_wise_output

    def fit(self, x_train, y_train):
        
        n_data = x_train.shape[0]

        for ep in  tqdm(range(self.num_epochs)):
            d = 0
            while d < n_data:
                t = min(n_data - d, self.batch_size)
            
                x_mini_batch = x_train[d:d+t]
                y_mini_batch = y_train[d:d+t]

                layer_wise_output = self.forward(x_mini_batch)
                loss = self.loss_fn.loss(layer_wise_output[-1], y_mini_batch)
                layer_gradients = self.backprop.backward(self.loss_fn, self.activations, layer_wise_output, self.NN, y_mini_batch)

                self.NN = self.optimizer.step(self.NN, layer_gradients)

                layer_gradients = []
                d += self.batch_size

    def evaluate(self, x_test, y_test):

        num_test_datapoints = x_test.shape[0]
        layer_wise_outputs = self.forward(x_test)
        y_hat = layer_wise_outputs[-1]
        y_pred = np.argmax(y_hat, axis=1)
        accuracy = (len(np.argwhere(y_pred == y_test))/num_test_datapoints)*100

        return accuracy

In [27]:
input_dims = x_train.shape[1]
n_hidden_layers = 5
n_hidden_neurons = [32, 64, 128, 64, 32] # This will be constant across all layers
batch_size = 64
lr = 0.0001
num_epochs = 10
n_classes = 10
weight_init_type = 'random'

model = fNN(input_dims, n_classes, "relu", batch_size, num_epochs, n_hidden_neurons, lr, n_hidden_layers, weight_init_type)
model.fit(x_train, y_train)
print("Accuracy: ", model.evaluate(x_test, y_test))

100%|██████████| 10/10 [00:42<00:00,  4.23s/it]

Accuracy:  63.71





In [None]:
"""
sweep_config = {
    "method" : "grid",
    "metric" : {
        "name" : "acc",
        "goal" : "maximize",
    },
    "parameters" : {
        "epochs" : {
            "values" : [5, 10]
        },
        "n_hidden_layers" : {
            "values" : [3, 4, 5]
        },
        "hidden_layer_size": {
            "values" : [32, 64, 128]
        },
        "learning_rate" : {
            "values" : [0.0001, 0.001]
        },
        "batch_size" : {
            "values" : [16, 32, 64]
        },
        "weight_init_type" : {
            "values" : ['random', 'xavier']
        },
        "activation" : {
            "values" : ['sigmoid']#, 'tanh', 'relu']
        },
    }
}

default_config = {
    "activation" : 'sigmoid',
    "batch_size" : 16,
    "epochs" : 5,
    "hidden_layer_size": 32,
    "learning_rate" : 0.0001,
    "n_hidden_layers" : 3,
    "weight_init_type" : 'random',
}

def train():
    wandb.init(config=default_config)
    config = wandb.config
    n_classes = 10
    input_dims = x_train.shape[1]
    model = fNN(input_dims, n_classes, *list(config._as_dict().values())[:-1])
    model.fit(x_train, y_train)  # your model training code here
    accuracy = model.evaluate(x_test, y_test)
    wandb.log({'acc': accuracy})
    wandb.finish(quiet=True)

sweep_id = wandb.sweep(sweep_config, project="test", entity="kbdl")
wandb.agent(sweep_id, function=train)
#wandb.finish()
"""

In [None]:
"""
trial_y_true = np.array([0, 1, 0, 0, 0, 0, 0, 0, 0, 0])
trial_y_true = trial_y_true.reshape(1, len(trial_y_true))
trial_y_pred = np.array([0, 0.03, 0.01, 0.6, 0, 0.1, 0.04, 0, 0.22, 0])
trial_y_pred = trial_y_pred.reshape(1, len(trial_y_pred))
lay = [0, trial_y_pred]
nn = [1]
model = fNN(input_dims, n_classes, n_hidden_layers, n_hidden_neurons, weight_init)
a, b, c = model.backprop.backward(model.loss_fn, [model.softmax_activation], lay, nn, trial_y_true)
"""