# Classification with MLPs

In [21]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

In [22]:
!pip install wandb



In [3]:
import wandb

In [4]:
!wandb login --relogin

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [24]:
# load wine quality dataset
wine = pd.read_csv('/content/WineQT.csv', index_col = -1)
wine.head()

Unnamed: 0_level_0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
Id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [25]:
# split into train, test, validation
# onehot encoding of quality
wine = pd.get_dummies(wine, columns=['quality'])

# standardize data apart from the last column
# standardize each column of the data
data = wine.iloc[:, :-6].values
data = (data - data.mean(axis=0)) / data.std(axis=0)
wine.iloc[:, :-6] = data

train, test = train_test_split(wine, test_size=0.2, random_state=42)
train, validation = train_test_split(train, test_size=0.2, random_state=42)
train_x = train.iloc[:, :-6]
train_y = train.iloc[:, -6:]
test_x = test.iloc[:, :-6]
test_y = test.iloc[:, -6:]
validation_x = validation.iloc[:, :-6]
validation_y = validation.iloc[:, -6:]
# convert to numpy array
train_x = np.array(train_x)
train_y = np.array(train_y)
test_x = np.array(test_x)
test_y = np.array(test_y)
validation_x = np.array(validation_x)
validation_y = np.array(validation_y)
train.shape, test.shape, validation.shape

((731, 17), (229, 17), (183, 17))

In [26]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoidprime(x):
    return np.exp(-x)/((1 + np.exp(-x))**2)

def tanh(x):
    return np.tanh(x)

def tanhprime(x):
    return 1 - np.tanh(x) ** 2

def relu(x):
    return np.maximum(0, x)

def reluprime(x):
    return np.where(x > 0, 1, 0)

In [27]:
class MLP():
    def __init__(self, learning_rate, activation_function, optimizers, hidden_layers, neurons):
        self.learning_rate = learning_rate
        self.optimizers = optimizers
        self.hidden_layers = hidden_layers
        self.neurons = neurons
        self.inputlayersize = 0
        self.outputlayersize = 0
        self.activationfunction = activation_function
        self.X = None

        self.weights = []
        self.biases = []
        if activation_function == "sigmoid":
            self.activation_function = sigmoid
            self.backprop_function = sigmoidprime
        if activation_function == "tanh":
            self.activation_function = tanh
            self.backprop_function = tanhprime
        if activation_function == "relu":
            self.activation_function = relu
            self.backprop_function = reluprime

    def weightsbiases(self):
        # initialize weights, biases
        self.inputlayersize = self.X.shape[1]
        self.outputlayersize = self.y.shape[1]
        self.weights.append(np.random.randn(self.inputlayersize, self.neurons[0]))
        self.biases.append(np.random.randn(self.neurons[0]))
        for i in range(0, self.hidden_layers - 1):
            self.weights.append(np.random.randn(self.neurons[i], self.neurons[i+1]))
            self.biases.append(np.random.randn(self.neurons[i + 1]))
        self.weights.append(np.random.randn(self.neurons[-1], self.outputlayersize))
        self.biases.append(np.random.randn(self.outputlayersize))
        # weights correctly initialized
        # biases correctly initialized

        self.z = [None]*len(self.weights)
        self.a = [None]*len(self.weights)

    def softmax(self, x):
        s = np.max(x, axis=1)
        s = s[:, np.newaxis]
        e_x = np.exp(x - s)
        div = np.sum(e_x, axis=1)
        div = div[:, np.newaxis]
        return e_x / div

    def forward(self, X):
        z = np.dot(X, self.weights[0]) + self.biases[0]
        a = self.activation_function(z)
        self.z[0] = z
        self.a[0] = a
        for i in range(1, len(self.weights) - 1):
            z = np.dot(a, self.weights[i]) + self.biases[i]
            a = self.activation_function(z)
            self.z[i] = z
            self.a[i] = a
        z = np.dot(a, self.weights[-1]) + self.biases[-1]
        a = self.softmax(z)
        self.z[-1] = z
        self.a[-1] = a

    def backward(self, X, y):
        # initialize weight gradients
        weight_gradients = [np.zeros_like(w) for w in self.weights]
        bias_gradients = [np.zeros_like(b) for b in self.biases]
        # calculate gradients
        error = self.a[-1] - y
        weight_gradients[-1] = np.dot(self.a[-2].T, error)
        # print(error)
        bias_gradients[-1] = np.sum(error, axis=0)
        # print(bias_gradients[-1])

        for i in range(len(self.weights)-2, 0, -1):
            error = np.dot(error, self.weights[i+1].T) * self.backprop_function(self.z[i])
            weight_gradients[i] = np.dot(self.a[i-1].T, error)
            bias_gradients[i] = np.sum(error,axis=0)

        error = np.dot(error, self.weights[1].T) * self.backprop_function(self.z[0])
        weight_gradients[0] = np.dot(X.T, error)
        bias_gradients[0] = np.sum(error,axis=0)

        # update weights and biases
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * weight_gradients[i] / X.shape[0]
            self.biases[i] -= self.learning_rate * bias_gradients[i] / X.shape[0]

    def lossandaccuracy(self, X, y):
        self.forward(X)
        loss = -np.sum(y * np.log(self.a[-1])) / X.shape[0]
        prediction = np.argmax(self.a[-1], axis=1)
        accuracy = np.sum(prediction == np.argmax(y, axis=1)) / X.shape[0]
        return loss, accuracy

    def train(self, X, y, val_X, val_y, epochs):
        self.X = X
        self.y = y
        self.weightsbiases()

        # pick optimizer
        if self.optimizers == "sgd":
            batch_size = 1
        if self.optimizers == "mini-batch":
            batch_size = 32
        if self.optimizers == "batch":
            batch_size = self.X.shape[0]



        # initialize loss
        loss_val = [None]*epochs
        loss_train = [None]*epochs
        accuracy_val = [None]*epochs
        accuracy_train = [None]*epochs
        for epoch in range(epochs):
            for i in range(0, self.X.shape[0], batch_size):
                batch_x = self.X[i:i+batch_size]
                batch_y = self.y[i:i+batch_size]
                self.forward(batch_x)
                self.backward(batch_x, batch_y)
            # calculate loss, accuracy
            loss_val[epoch], accuracy_val[epoch] = self.lossandaccuracy(val_X, val_y)
            loss_train[epoch], accuracy_train[epoch] = self.lossandaccuracy(self.X, self.y)
            print("Epoch: ", epoch, " Loss: ", loss_val[epoch], " Accuracy: ", accuracy_val[epoch])
            # wandb.log({"loss": loss_val[epoch], "accuracy": accuracy_val[epoch]})

    def predict(self, X):
        self.forward(X)
        prediction = np.argmax(self.a[-1], axis=1)
        return prediction




## Hyperparameter tuning Using WandB

In [5]:
sweep_config = {
    'method': 'grid',
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'learning_rate': {
            'values': [0.001, 0.01]
        },
        'optimizers': {
            'values': ['sgd', 'batch', 'mini-batch']
        },
        'neurons': {
            'values': [[2, [10, 10]], [3, [20, 10, 7]]]
        },
        'activation_functions': {
            'values': ['sigmoid', 'tanh', 'relu']
        },
        'epochs': {
            'values': [500, 1000]
        },
    }
}


In [10]:
sweep_id = wandb.sweep(sweep_config, project="assignment-3 part-2 classification (singlelabel)")

Create sweep with ID: re36z05f
Sweep URL: https://wandb.ai/sanika-damle/assignment-3%20part-2%20classification%20%28singlelabel%29/sweeps/re36z05f


In [None]:
def slc():
    with wandb.init() as run:
        lr = wandb.config.learning_rate
        optimizers = wandb.config.optimizers
        hidden_layers = wandb.config.neurons[0]
        neurons = wandb.config.neurons[1]
        activation_function = wandb.config.activation_functions
        epochs = wandb.config.epochs
        mlp = MLP(lr, activation_function, optimizers, hidden_layers, neurons)
        mlp.train(train_x, train_y, validation_x, validation_y, epochs)
        ypred = mlp.predict(test_x)
        f1 = f1_score(np.argmax(test_y, axis=1), ypred, average='macro')
        precision = precision_score(np.argmax(test_y, axis=1), ypred, average='macro')
        recall = recall_score(np.argmax(test_y, axis=1), ypred, average='macro')
        accuracy = np.sum(ypred == np.argmax(test_y, axis=1)) / test_x.shape[0]
        wandb.log({"accuracy": accuracy, "f1": f1, "precision": precision, "recall": recall})

wandb.agent(sweep_id, function = slc)


In [28]:
model = MLP(0.01, "relu", "mini-batch", 2, [10, 10])
model.train(train_x, train_y, validation_x, validation_y, 1000)

Epoch:  0  Loss:  6.029999762014073  Accuracy:  0.2896174863387978
Epoch:  1  Loss:  4.3099344310438035  Accuracy:  0.3879781420765027
Epoch:  2  Loss:  3.53994012350425  Accuracy:  0.4207650273224044
Epoch:  3  Loss:  3.1040496701777056  Accuracy:  0.46994535519125685
Epoch:  4  Loss:  2.825814893681584  Accuracy:  0.48633879781420764
Epoch:  5  Loss:  2.6171133344851607  Accuracy:  0.4918032786885246
Epoch:  6  Loss:  2.4591451505631214  Accuracy:  0.4918032786885246
Epoch:  7  Loss:  2.3296257641972535  Accuracy:  0.4972677595628415
Epoch:  8  Loss:  2.217090473696862  Accuracy:  0.4918032786885246
Epoch:  9  Loss:  2.1225301501807134  Accuracy:  0.4972677595628415
Epoch:  10  Loss:  2.0483409809021706  Accuracy:  0.48633879781420764
Epoch:  11  Loss:  1.9838049513919964  Accuracy:  0.5027322404371585
Epoch:  12  Loss:  1.9295666195622638  Accuracy:  0.4972677595628415
Epoch:  13  Loss:  1.8799353730466857  Accuracy:  0.4972677595628415
Epoch:  14  Loss:  1.8349621633625561  Accurac

In [29]:
# make predictions on test set
ypred = model.predict(test_x)
# calculate accuracy
accuracy = np.sum(ypred == np.argmax(test_y, axis=1)) / test_x.shape[0]
print("Accuracy on test set: ", accuracy)

Accuracy on test set:  0.6244541484716157


In [30]:
test_y = np.argmax(test_y, axis=1)

In [31]:
# printing classification report
print(classification_report(test_y, ypred, zero_division=0))

              precision    recall  f1-score   support

           1       0.00      0.00      0.00         6
           2       0.74      0.73      0.74        96
           3       0.59      0.67      0.63        99
           4       0.37      0.27      0.31        26
           5       0.00      0.00      0.00         2

    accuracy                           0.62       229
   macro avg       0.34      0.33      0.33       229
weighted avg       0.61      0.62      0.61       229



# MLPs for multilabel classification

In [32]:
from sklearn.preprocessing import MultiLabelBinarizer

In [33]:
# load advertisement.csv
advertisement = pd.read_csv('/content/advertisement.csv')
advertisement.head()

Unnamed: 0,age,gender,income,education,married,children,city,occupation,purchase_amount,most bought item,labels
0,45,Male,61271.953359,Master,False,3,Lake Sheila,Doctor,87.697118,monitor,electronics clothing sports
1,24,Female,53229.101074,High School,False,1,Crystalburgh,Businessman,115.135586,lipstick,furniture beauty
2,45,Female,30066.046684,Bachelor,True,3,Margaretburgh,Engineer,101.694559,biscuits,clothing electronics food sports
3,19,Male,48950.246384,PhD,False,0,Williamshaven,Lawyer,97.964887,maggi,food
4,29,Female,44792.627094,Master,False,0,New Paul,Businessman,86.847281,carpet,home


In [34]:
advertisement['labels'] = advertisement['labels'].str.split()
advertisement.head()

Unnamed: 0,age,gender,income,education,married,children,city,occupation,purchase_amount,most bought item,labels
0,45,Male,61271.953359,Master,False,3,Lake Sheila,Doctor,87.697118,monitor,"[electronics, clothing, sports]"
1,24,Female,53229.101074,High School,False,1,Crystalburgh,Businessman,115.135586,lipstick,"[furniture, beauty]"
2,45,Female,30066.046684,Bachelor,True,3,Margaretburgh,Engineer,101.694559,biscuits,"[clothing, electronics, food, sports]"
3,19,Male,48950.246384,PhD,False,0,Williamshaven,Lawyer,97.964887,maggi,[food]
4,29,Female,44792.627094,Master,False,0,New Paul,Businessman,86.847281,carpet,[home]


In [35]:
# print the number of unique labels
advertisement['labels'].explode().unique()

array(['electronics', 'clothing', 'sports', 'furniture', 'beauty', 'food',
       'home', 'books'], dtype=object)

In [36]:
# onehot gender
gender = pd.get_dummies(advertisement['gender'], prefix='gender')
advertisement = pd.concat([advertisement, gender], axis=1)
occupation = pd.get_dummies(advertisement['occupation'], prefix='occupation')
advertisement = pd.concat([advertisement, occupation], axis=1)
advertisement.head()

Unnamed: 0,age,gender,income,education,married,children,city,occupation,purchase_amount,most bought item,...,occupation_Businessman,occupation_Doctor,occupation_Engineer,occupation_HR,occupation_Housewife,occupation_Lawyer,occupation_Retired,occupation_Salesman,occupation_Scientist,occupation_Unemployed
0,45,Male,61271.953359,Master,False,3,Lake Sheila,Doctor,87.697118,monitor,...,0,1,0,0,0,0,0,0,0,0
1,24,Female,53229.101074,High School,False,1,Crystalburgh,Businessman,115.135586,lipstick,...,1,0,0,0,0,0,0,0,0,0
2,45,Female,30066.046684,Bachelor,True,3,Margaretburgh,Engineer,101.694559,biscuits,...,0,0,1,0,0,0,0,0,0,0
3,19,Male,48950.246384,PhD,False,0,Williamshaven,Lawyer,97.964887,maggi,...,0,0,0,0,0,1,0,0,0,0
4,29,Female,44792.627094,Master,False,0,New Paul,Businessman,86.847281,carpet,...,1,0,0,0,0,0,0,0,0,0


In [37]:
# binarize the labels column
binarizer = MultiLabelBinarizer()
vecs = binarizer.fit_transform(advertisement['labels'])
binarized_df = pd.DataFrame(vecs, columns=binarizer.classes_)
advertisement = pd.concat([advertisement, binarized_df], axis=1)
advertisement.head()

Unnamed: 0,age,gender,income,education,married,children,city,occupation,purchase_amount,most bought item,...,occupation_Scientist,occupation_Unemployed,beauty,books,clothing,electronics,food,furniture,home,sports
0,45,Male,61271.953359,Master,False,3,Lake Sheila,Doctor,87.697118,monitor,...,0,0,0,0,1,1,0,0,0,1
1,24,Female,53229.101074,High School,False,1,Crystalburgh,Businessman,115.135586,lipstick,...,0,0,1,0,0,0,0,1,0,0
2,45,Female,30066.046684,Bachelor,True,3,Margaretburgh,Engineer,101.694559,biscuits,...,0,0,0,0,1,1,1,0,0,1
3,19,Male,48950.246384,PhD,False,0,Williamshaven,Lawyer,97.964887,maggi,...,0,0,0,0,0,0,1,0,0,0
4,29,Female,44792.627094,Master,False,0,New Paul,Businessman,86.847281,carpet,...,0,0,0,0,0,0,0,0,1,0


In [38]:
# drop labels
advertisement.drop(['labels', 'most bought item', 'city', 'gender', 'occupation'], axis=1, inplace=True)
advertisement['married'] = advertisement['married'].apply(lambda x: 1 if x == True else 0)
advertisement['education'] = advertisement['education'].apply(lambda x: 1 if x == 'High School' else 2 if x == 'Bachelor' else 3 if x == 'Master' else 4)
# onehot education and gender
advertisement.head(10)

Unnamed: 0,age,income,education,married,children,purchase_amount,gender_Female,gender_Male,occupation_Artist,occupation_Businessman,...,occupation_Scientist,occupation_Unemployed,beauty,books,clothing,electronics,food,furniture,home,sports
0,45,61271.953359,3,0,3,87.697118,0,1,0,0,...,0,0,0,0,1,1,0,0,0,1
1,24,53229.101074,1,0,1,115.135586,1,0,0,1,...,0,0,1,0,0,0,0,1,0,0
2,45,30066.046684,2,1,3,101.694559,1,0,0,0,...,0,0,0,0,1,1,1,0,0,1
3,19,48950.246384,4,0,0,97.964887,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0
4,29,44792.627094,3,0,0,86.847281,1,0,0,1,...,0,0,0,0,0,0,0,0,1,0
5,20,51266.767047,3,1,3,95.145103,1,0,0,0,...,1,0,0,1,0,1,0,0,0,1
6,27,29578.136416,2,1,3,69.022842,0,1,0,0,...,0,0,1,0,1,0,0,1,0,0
7,20,35325.309005,4,0,3,110.564517,1,0,0,0,...,0,0,1,1,0,0,0,0,0,0
8,51,40232.564356,4,1,0,107.83549,0,1,0,0,...,0,0,0,0,0,1,1,0,1,1
9,19,48053.583882,4,1,0,108.854898,0,1,1,0,...,0,0,0,0,0,0,1,1,0,0


In [39]:
data = advertisement.iloc[:, :-8].values
data = (data - data.mean(axis=0)) / data.std(axis=0)
advertisement.iloc[:, :-8] = data

train, test = train_test_split(advertisement, test_size=0.2, random_state=42)
train, validation = train_test_split(train, test_size=0.2, random_state=42)
train_x = train.iloc[:, :-8]
train_y = train.iloc[:, -8:]
test_x = test.iloc[:, :-8]
test_y = test.iloc[:, -8:]
validation_x = validation.iloc[:, :-8]
validation_y = validation.iloc[:, -8:]
# convert to numpy array
train_x = np.array(train_x)
train_y = np.array(train_y)
test_x = np.array(test_x)
test_y = np.array(test_y)
validation_x = np.array(validation_x)
validation_y = np.array(validation_y)
train.shape, test.shape, validation.shape

((640, 27), (200, 27), (160, 27))

In [40]:
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, log_loss

In [41]:
class MLP_classification():
    def __init__(self, learning_rate, activation_function, optimizers, hidden_layers, neurons):
        self.learning_rate = learning_rate
        self.optimizers = optimizers
        self.hidden_layers = hidden_layers
        self.neurons = neurons
        self.inputlayersize = 0
        self.outputlayersize = 0
        self.X = None
        self.activationfunction = activation_function

        self.weights = []
        self.biases = []
        if activation_function == "sigmoid":
            self.activation_function = sigmoid
            self.backprop_function = sigmoidprime
        if activation_function == "tanh":
            self.activation_function = tanh
            self.backprop_function = tanhprime
        if activation_function == "relu":
            self.activation_function = relu
            self.backprop_function = reluprime

    def weightsbiases(self):
        # initialize weights, biases
        self.inputlayersize = self.X.shape[1]
        self.outputlayersize = self.y.shape[1]
        self.weights.append(np.random.randn(self.inputlayersize, self.neurons[0]))
        self.biases.append(np.random.randn(self.neurons[0]))
        for i in range(0, self.hidden_layers - 1):
            self.weights.append(np.random.randn(self.neurons[i], self.neurons[i+1]))
            self.biases.append(np.random.randn(self.neurons[i + 1]))
        self.weights.append(np.random.randn(self.neurons[-1], self.outputlayersize))
        self.biases.append(np.random.randn(self.outputlayersize))
        # weights correctly initialized
        # biases correctly initialized

        self.z = [None]*len(self.weights)
        self.a = [None]*len(self.weights)

    def forward(self, X):
        z = np.dot(X, self.weights[0]) + self.biases[0]
        a = self.activation_function(z)
        self.z[0] = z
        self.a[0] = a
        for i in range(1, len(self.weights) - 1):
            z = np.dot(a, self.weights[i]) + self.biases[i]
            a = self.activation_function(z)
            self.z[i] = z
            self.a[i] = a
        z = np.dot(a, self.weights[-1]) + self.biases[-1]
        a = sigmoid(z)
        self.z[-1] = z
        self.a[-1] = a

    def backward(self, X, y):
        # initialize weight gradients
        weight_gradients = [np.zeros_like(w) for w in self.weights]
        bias_gradients = [np.zeros_like(b) for b in self.biases]
        # calculate gradients
        error = (self.a[-1] - y)*sigmoidprime(self.z[-1])
        weight_gradients[-1] = np.dot(self.a[-2].T, error)
        # print(error)
        bias_gradients[-1] = np.sum(error, axis=0)
        # print(bias_gradients[-1])

        for i in range(len(self.weights)-2, 0, -1):
            error = np.dot(error, self.weights[i+1].T) * self.backprop_function(self.z[i])
            weight_gradients[i] = np.dot(self.a[i-1].T, error)
            bias_gradients[i] = np.sum(error,axis=0)

        error = np.dot(error, self.weights[1].T) * self.backprop_function(self.z[0])
        weight_gradients[0] = np.dot(X.T, error)
        bias_gradients[0] = np.sum(error,axis=0)

        # update weights and biases
        for i in range(len(self.weights)):
            self.weights[i] -= self.learning_rate * weight_gradients[i] / X.shape[0]
            self.biases[i] -= self.learning_rate * bias_gradients[i] / X.shape[0]

    def lossandaccuracy(self, X, y):
        self.forward(X)

        num_samples = y.shape[0]
        num_classes = self.outputlayersize

        # Initialize arrays for per-prediction loss and Hamming distance
        loss_per_sample = np.zeros(num_samples)
        hamming_distance_per_sample = np.zeros(num_samples)

        for sample_idx in range(num_samples):
            sample_y = y[sample_idx]
            sample_prediction = self.a[-1][sample_idx]

            # Binary cross-entropy loss for the current prediction
            loss_per_sample[sample_idx] = -np.sum(sample_y * np.log(sample_prediction) + (1 - sample_y) * np.log(1 - sample_prediction))

            # Hamming distance for the current prediction
            binary_predictions = (sample_prediction > 0.5).astype(int)
            hamming_distance_per_sample[sample_idx] = np.sum(binary_predictions != sample_y)

        # Calculate overall Hamming distance accuracy
        overall_hamming_accuracy = 1 - (np.sum(hamming_distance_per_sample) / (num_samples * num_classes))
        overall_loss = np.mean(loss_per_sample)

        return overall_loss, overall_hamming_accuracy



    def train(self, X, y, val_X, val_y, epochs):
        self.X = X
        self.y = y
        self.weightsbiases()


        # pick optimizer
        if self.optimizers == "sgd":
            batch_size = 1
        if self.optimizers == "mini-batch":
            batch_size = 32
        if self.optimizers == "batch":
            batch_size = self.X.shape[0]

        # initialize loss
        loss_val = [None]*epochs
        loss_train = [None]*epochs
        accuracy_val = [None]*epochs
        accuracy_train = [None]*epochs
        for epoch in range(epochs):
            for i in range(0, self.X.shape[0], batch_size):
                batch_x = self.X[i:i+batch_size]
                batch_y = self.y[i:i+batch_size]
                self.forward(batch_x)
                self.backward(batch_x, batch_y)
            # calculate loss, accuracy
            loss_val[epoch], accuracy_val[epoch] = self.lossandaccuracy(val_X, val_y)
            loss_train[epoch], accuracy_train[epoch] = self.lossandaccuracy(self.X, self.y)
            print("Epoch: ", epoch, " Loss: ", loss_val[epoch], " Accuracy: ", accuracy_val[epoch])
            # wandb.log({"loss": loss_val[epoch], "accuracy": accuracy_val[epoch]})

    def predict(self, X):
        print(X.shape)
        self.forward(X)
        # print(prediction)
        binary_predictions = (self.a[-1] > 0.5).astype(int)
        return binary_predictions




### Hyperparameter Tuning

In [42]:
sweep_id = wandb.sweep(sweep_config, project="assignment-3 part-2 multilabelclassification")

Create sweep with ID: x803ziei
Sweep URL: https://wandb.ai/sanika-damle/assignment-3%20part-2%20multilabelclassification/sweeps/x803ziei


In [None]:
def mcc():
    with wandb.init() as run:
        lr = wandb.config.learning_rate
        optimizers = wandb.config.optimizers
        hidden_layers = wandb.config.neurons[0]
        neurons = wandb.config.neurons[1]
        activation_function = wandb.config.activation_functions
        epochs = wandb.config.epochs
        modelclass = MLP_classification(lr, activation_function, optimizers, hidden_layers, neurons)
        modelclass.train(train_x, train_y, validation_x, validation_y, epochs)
        ypred = modelclass.predict(test_x)


        f1 = f1_score(test_y, ypred, average='macro')
        precision = precision_score(test_y, ypred, average='macro')
        recall = recall_score(test_y, ypred, average='macro')
        accuracy = accuracy_score(test_y, ypred)
        wandb.log({"accuracy": accuracy, "f1": f1, "precision": precision, "recall": recall})

wandb.agent(sweep_id, function = mcc)


In [44]:
model = MLP_classification(0.1, "tanh", "batch", 2, [10, 10])
model.train(train_x, train_y, validation_x, validation_y, 1000)

Epoch:  0  Loss:  9.00679207687081  Accuracy:  0.53359375
Epoch:  1  Loss:  8.993232108516759  Accuracy:  0.53203125
Epoch:  2  Loss:  8.979706944732172  Accuracy:  0.53359375
Epoch:  3  Loss:  8.966217041314286  Accuracy:  0.5328125
Epoch:  4  Loss:  8.952762777904548  Accuracy:  0.53359375
Epoch:  5  Loss:  8.939344467705109  Accuracy:  0.53359375
Epoch:  6  Loss:  8.925962365744827  Accuracy:  0.53359375
Epoch:  7  Loss:  8.912616675847763  Accuracy:  0.53515625
Epoch:  8  Loss:  8.899307556537646  Accuracy:  0.53671875
Epoch:  9  Loss:  8.886035126133715  Accuracy:  0.53671875
Epoch:  10  Loss:  8.872799467275227  Accuracy:  0.5375
Epoch:  11  Loss:  8.85960063106944  Accuracy:  0.53671875
Epoch:  12  Loss:  8.8464386410034  Accuracy:  0.53671875
Epoch:  13  Loss:  8.833313496703187  Accuracy:  0.53828125
Epoch:  14  Loss:  8.82022517757128  Accuracy:  0.53671875
Epoch:  15  Loss:  8.807173646287788  Accuracy:  0.53671875
Epoch:  16  Loss:  8.7941588521265  Accuracy:  0.53671875
Ep

In [45]:
# fit model on test data
binary_predictions = model.predict(test_x)
# print(binary_predictions.shape)
# predictions.shape


(200, 19)


In [46]:
def hamming_distance_accuracy(true_labels, predicted_labels):
    if true_labels.shape != predicted_labels.shape:
        raise ValueError("Input vectors must have the same shape.")

    num_samples, num_classes = true_labels.shape

    hamming_distances = np.sum(true_labels != predicted_labels, axis=1)

    per_sample_accuracies = 1 - (hamming_distances / num_classes)

    accuracy = np.mean(per_sample_accuracies)

    return accuracy

In [47]:
# find accuracy as the hamming distance accuracy
print("Hamming distance accuracy: ", hamming_distance_accuracy(test_y, binary_predictions))

Hamming distance accuracy:  0.62375


In [48]:
# printing sklearn classification report
print(classification_report(test_y, binary_predictions, zero_division=0))

              precision    recall  f1-score   support

           0       0.35      0.18      0.24        68
           1       0.24      0.09      0.13        66
           2       0.15      0.03      0.05        63
           3       0.41      0.21      0.27        63
           4       0.30      0.15      0.20        59
           5       0.31      0.18      0.23        66
           6       0.38      0.07      0.12        71
           7       0.41      0.23      0.29        71

   micro avg       0.33      0.14      0.20       527
   macro avg       0.32      0.14      0.19       527
weighted avg       0.32      0.14      0.19       527
 samples avg       0.22      0.12      0.15       527

