In [1]:
import numpy as np
import pandas as pd
from copy import copy
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.preprocessing import OneHotEncoder, MultiLabelBinarizer
from sklearn.impute import SimpleImputer 
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, hamming_loss
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import wandb

# 2.1

In [2]:
data = pd.read_csv('WineQT.csv', header=0)

In [3]:
train_data, temp_data = train_test_split(data, test_size=0.4,random_state = 42)
validation_data, test_data = train_test_split(temp_data, test_size=0.5, random_state = 42)

In [5]:
scaler = StandardScaler()
minmax_scaler = MinMaxScaler()

imputer = SimpleImputer(strategy='mean')  

train_data_scaled = scaler.fit_transform(imputer.fit_transform(train_data.drop(columns=['quality'])))
train_labels = train_data['quality']

validation_data_scaled = scaler.transform(imputer.transform(validation_data.drop(columns=['quality'])))
test_data_scaled = scaler.transform(imputer.transform(test_data.drop(columns=['quality'])))

train_data_scaled_normalized = minmax_scaler.fit_transform(train_data_scaled)
validation_data_scaled_normalized = minmax_scaler.transform(validation_data_scaled)
test_data_scaled_normalized = minmax_scaler.transform(test_data_scaled)

# print(train_labels)
# print(train_data_scaled_normalized)
# print(test_data_scaled_normalized.shape)
# print(validation_data_scaled_normalized.shape)
X_train = train_data_scaled
y_train = train_labels
X_val = validation_data_scaled
y_val = validation_data['quality']
X_test = test_data_scaled
y_test = validation_data['quality']
y_train = np.array(y_train)
# y_val = np.array(y_val)
y_val = pd.get_dummies(y_val).astype(int)
y_val = np.array(y_val)
y_train = pd.get_dummies(y_train).astype(int)
y_train = np.array(y_train)
y_test = pd.get_dummies(y_test).astype(int)
y_test = np.array(y_test)
print(X_train)
print(y_train)


[[-1.67058247  1.20622441 -0.91067852 ... -0.59135112  1.0674252
   1.64361099]
 [-0.548498    0.69262724 -1.37982978 ... -1.11462937 -0.42042679
  -1.73858966]
 [-0.0169843   1.14915806 -0.28514351 ... -0.82391923 -0.88538054
  -0.31652802]
 ...
 [-0.48944092 -1.01936335  0.3403915  ... -0.358783    1.34639745
   1.62012349]
 [-0.19415553  0.23609641  0.23613566 ...  0.16449526 -0.88538054
   0.84290061]
 [-0.43038384 -0.04923535  0.70528692 ... -0.12621489  1.2534067
   1.60944735]]
[[0 0 0 1 0 0]
 [0 0 0 0 1 0]
 [0 0 1 0 0 0]
 ...
 [0 0 0 1 0 0]
 [0 0 0 1 0 0]
 [0 0 0 1 0 0]]


# 2.2 & 2.3

In [5]:
class Neural_Network(object):
    def __init__(self, learning_rate=0.01,epochs = 5000, inputLayerSize: int = 2, hiddenLayerSize = [3,3,3] , outputLayerSize: int = 1):
        # Define Hyperparameters
        self.inputLayerSize = inputLayerSize
        self.outputLayerSize = outputLayerSize
        self.hiddenLayerSize = hiddenLayerSize
        self.activation = self.relu
        self.activationPrime = self.reluPrime
        self.learning_rate = learning_rate
        # Weights (parameters)
        self.num_layers = len(hiddenLayerSize)+1
        self.weights = []
        self.weights.append(np.random.randn(self.inputLayerSize, self.hiddenLayerSize[0]))
        for i in range(1,self.num_layers-1):
            self.weights.append(np.random.randn(self.hiddenLayerSize[i-1], self.hiddenLayerSize[i]))
        self.weights.append(np.random.randn(self.hiddenLayerSize[-1], self.outputLayerSize))
        self.optimizer = 'SGD'
        self.max_iterations = epochs

    def forward(self, X):
        self.z = [None] * (len(self.weights))
        self.a = [None] * (len(self.weights)-1)
        
        self.z[0] = np.dot(X, self.weights[0])
        for i in range(0,len(self.a)):
            self.a[i] = self.activation(self.z[i])
            self.z[i+1] = np.dot(self.a[i],self.weights[i+1])
        yHat = self.softmax(self.z[-1])
        return yHat

    def relu(self, z):
        return np.maximum(0, z)  # ReLU activation

    def tanh(self, z):
        return np.tanh(z)  # Tanh activation

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoidPrime(self, z):
        return np.exp(-z) / ((1 + np.exp(-z))**2)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z))  # Subtracting max(z) for numerical stability
        return exp_z / exp_z.sum(axis=1, keepdims=True)

    def costFunction(self, X, y):
        self.yHat = self.forward(X)
        J = -np.sum(y * np.log(self.yHat)) / len(X)
        return J

    def costFunctionPrime(self, X, y):
        self.yHat = self.forward(X)
        deltas = [None] * (len(self.weights))
        deltas[-1] = self.yHat - y
        dJdWs = [None] * (len(self.weights))
        for i in range(len(deltas)-2,-1,-1):
            dJdWs[i+1] = np.dot(self.a[i].T, deltas[i+1])
            deltas[i] = np.dot(deltas[i+1],self.weights[i+1].T)*self.activationPrime(self.z[i])
        dJdWs[0] = np.dot(X.T, deltas[0])
        
        # delta3 = self.yHat - y
        # dJdW2 = np.dot(self.a2.T, delta3)

        # delta2 = np.dot(delta3, self.W2.T) * self.activationPrime(self.z2)  # Use ReLU derivative
        # dJdW1 = np.dot(X.T, delta2)

        return dJdWs

    def reluPrime(self, z):
        return np.where(z > 0, 1, 0)  # Derivative of ReLU

    def tanhPrime(self, z):
        return 1 - np.tanh(z)**2  # Derivative of Tanh

    def backward(self, X, y, learning_rate):
        dJdWs = self.costFunctionPrime(X, y)
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dJdWs[i]
        # self.W1 -= learning_rate * dJdW1
        # self.W2 -= learning_rate * dJdW2
    def train(self, X, y, batch_size = 32, learning_rate=0.1, max_iterations=10000,wand = 0):
        if(self.optimizer == 'SGD'):
            return self.train_sgd(X=X, y=y,wand=wand)
        elif(self.optimizer == 'Batch'):
            return self.train_batch(X=X, y=y,wand=wand)
        elif(self.optimizer == 'MiniBatch'):
            return self.train_mini_batch(X=X, y=y,wand=wand)
    def train_sgd(self, X, y, learning_rate=0.1, max_iterations=10000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on MLP Classifier for WineQT Dataset")
        for i in range(self.max_iterations):
            idx = np.random.randint(len(X))
            self.backward(X[idx:idx+1], y[idx:idx+1], self.learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                # print("Iteration %d: loss = %f" % (i, loss))
                if(wand == 1):
                    wandb.log({"Loss": loss, "Epoch": i})
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")
    def train_batch(self, X, y, learning_rate=0.0001, max_iterations=10000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on MLP Classifier for WineQT Dataset")
        for i in range(self.max_iterations):
            self.backward(X, y, self.learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                # print("Iteration %d: loss = %f" % (i, loss))
                if(wand == 1):
                    wandb.log({"Loss": loss, "Epoch": i})
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")
    def train_mini_batch(self, X, y, batch_size=32, learning_rate=0.01, max_iterations=5000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on MLP Classifier for WineQT Dataset")
        for i in range(self.max_iterations):
            indices = np.arange(len(X))
            np.random.shuffle(indices)
            for j in range(0, len(X), batch_size):
                batch_indices = indices[j:j+batch_size]
                self.backward(X[batch_indices], y[batch_indices], self.learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                # print("Iteration %d: loss = %f" % (i, loss))
                if(wand == 1):
                    wandb.log({"Loss": loss, "Epoch": i})
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")

    def predict(self, X):
        yHat = self.forward(X)
        # return yHat
        binary_predictions = np.zeros_like(yHat)
        binary_predictions[np.arange(len(yHat)), yHat.argmax(axis=1)] = 1
        return binary_predictions
        # return np.argmax(yHat, axis=1)
    def set_learning_rate(self, learning_rate):
        self.learning_rate = learning_rate

    def set_activation_function(self, activation_function):
        if(activation_function == 'sigmoid'):
            self.activation = self.sigmoid
            self.activationPrime = self.sigmoidPrime
        if(activation_function == 'relu'):
            self.activation = self.relu
            self.activationPrime = self.reluPrime
        if(activation_function == 'tanh'):
            self.activation = self.tanh
            self.activationPrime = self.tanhPrime

    def set_optimizer(self, optimizer):
        self.optimizer = optimizer

    def set_hidden_layers(self, hidden_layer_sizes):
        self.hiddenLayerSize = hidden_layer_sizes
        self.num_layers = len(self.hiddenLayerSize)+1
        self.weights = []
        self.weights.append(np.random.randn(self.inputLayerSize, self.hiddenLayerSize[0]))
        for i in range(1,self.num_layers-1):
            self.weights.append(np.random.randn(self.hiddenLayerSize[i-1], self.hiddenLayerSize[i]))
        self.weights.append(np.random.randn(self.hiddenLayerSize[-1], self.outputLayerSize))
        # self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
        # self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
        # self.weights, self.biases = self.initialize_weights()
    def set_epochs(self, epcohs):
        self.max_iterations = epochs
        print(self.max_iterations)


In [6]:
wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlsamourya07[0m ([33mmourya[0m). Use [1m`wandb login --relogin`[0m to force relogin


True

In [7]:
np.random.seed(27)
mlp_sgd = Neural_Network(learning_rate=0.01,epochs=5000, inputLayerSize=12, hiddenLayerSize=[18], outputLayerSize=6)
mlp_sgd.set_activation_function('sigmoid')
mlp_sgd.set_optimizer('SGD')
loss = mlp_sgd.train(X_train, y_train,wand=1)
wandb.init(project="Reporting Accuracy for MLP Classifier for WineQT Dataset")
y_pred = mlp_sgd.predict(X_val)
acc = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='macro') 
precision = precision_score(y_val, y_pred, average='macro')
recall = recall_score(y_val, y_pred, average='macro')
print("Loss:", loss)
print("Accuracy:", acc)
print("F1 Score:", f1)
print("Precision:", precision)
print("Recall:", recall)
y_train_pred = mlp_sgd.predict(X_train)
train_accuracy = accuracy_score(y_train, y_train_pred)
train_f1 = f1_score(y_train, y_train_pred, average='macro')  
train_precision = precision_score(y_train, y_train_pred, average='macro')
train_recall = recall_score(y_train, y_train_pred, average='macro')
print("Loss:", loss)
print("Accuracy:", train_accuracy)
print("F1 Score:", train_f1)
print("Precision:", train_precision)
print("Recall:", train_recall)
print(acc)
print(train_accuracy)
wandb.log({"Loss": loss, "Accuracy": acc})
wandb.log({"Loss": loss, "Accuracy": train_accuracy})



VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
Loss,█▅▃▂▂▂▂▂▂▂▂▂▁▂▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁

0,1
Epoch,4999.0
Loss,0.93207


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113667622238508, max=1.0…

Loss: 0.9320701134595549
Accuracy: 0.62882096069869
F1 Score: 0.28086370197375893
Precision: 0.31639826991828396
Recall: 0.28161110665152955
Loss: 0.9320701134595549
Accuracy: 0.6116788321167883
F1 Score: 0.2959359197423386
Precision: 0.3792398709252854
Recall: 0.2880217153732829
0.62882096069869
0.6116788321167883


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [8]:

# Define the sweep configuration
sweep_config = {
    "name": "example_sweep",
    "method": "grid",  # Other options: "grid", "bayes"
    "metric": {"goal": "maximize", "name": "accuracy"},
    "parameters": {
        "activation": {"values": ["sigmoid", "tanh", "relu"]},
        "optimizer": {"values": ["SGD", "Batch", "MiniBatch"]},
    },
    "early_terminate": {
        "type": "hyperband",
        "s": 2,
        "eta": 3,
        "max_iter": 9,
    },
}

# Initialize the sweep
sweep_id = wandb.sweep(sweep_config, project="MLP Classification on different activation functions and optimizers for WineQT Dataset")

# Define your training function
def train():
    # Access sweep configuration
    wandb.init(project="MLP Classification on different activation functions and optimizers for WineQT Dataset")
    
    config = wandb.config

    # Your training code here, using hyperparameters from config
    # Example:
    activation = config.activation
    optimizer = config.optimizer

    # Log metrics
    np.random.seed(27)
    mlp_sgd = Neural_Network(learning_rate=0.01,epochs=5000, inputLayerSize=12, hiddenLayerSize=[18], outputLayerSize=6)
    mlp_sgd.set_activation_function(activation)
    mlp_sgd.set_optimizer(optimizer)
    loss = mlp_sgd.train(X_train, y_train)
    y_pred = mlp_sgd.predict(X_val)
    acc = accuracy_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred, average='macro') 
    precision = precision_score(y_val, y_pred, average='macro')
    recall = recall_score(y_val, y_pred, average='macro')
    wandb.log({"activation": activation, "optimizer": optimizer, "accuracy": acc})

# Start the sweep
wandb.agent(sweep_id, function=train)




Create sweep with ID: 4x470ihy
Sweep URL: https://wandb.ai/mourya/MLP%20Classification%20on%20different%20activation%20functions%20and%20optimizers%20for%20WineQT%20Dataset/sweeps/4x470ihy
<IPython.core.display.HTML object>




<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>


[34m[1mwandb[0m: Agent Starting Run: nu34h0cd with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	optimizer: SGD


  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁

0,1
accuracy,0.62882
activation,sigmoid
optimizer,SGD


[34m[1mwandb[0m: Agent Starting Run: fk24l4wi with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	optimizer: Batch


VBox(children=(Label(value='0.006 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.896696…

0,1
accuracy,▁

0,1
accuracy,0.53275
activation,sigmoid
optimizer,Batch


[34m[1mwandb[0m: Agent Starting Run: ja0bq06o with config:
[34m[1mwandb[0m: 	activation: sigmoid
[34m[1mwandb[0m: 	optimizer: MiniBatch


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.362416…

0,1
accuracy,▁

0,1
accuracy,0.55459
activation,sigmoid
optimizer,MiniBatch


[34m[1mwandb[0m: Agent Starting Run: 6u3k0p7g with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	optimizer: SGD


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.286782…

0,1
accuracy,▁

0,1
accuracy,0.55895
activation,tanh
optimizer,SGD


[34m[1mwandb[0m: Agent Starting Run: ea8sudmm with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	optimizer: Batch


VBox(children=(Label(value='0.002 MB of 0.006 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.285307…

0,1
accuracy,▁

0,1
accuracy,0.51965
activation,tanh
optimizer,Batch


[34m[1mwandb[0m: Agent Starting Run: x73hn1tb with config:
[34m[1mwandb[0m: 	activation: tanh
[34m[1mwandb[0m: 	optimizer: MiniBatch


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112454911118322, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁

0,1
accuracy,0.51528
activation,tanh
optimizer,MiniBatch


[34m[1mwandb[0m: Agent Starting Run: jux1czz8 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	optimizer: SGD


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111240250001073, max=1.0)…

  _warn_prf(average, modifier, msg_start, len(result))


0,1
accuracy,▁

0,1
accuracy,0.56769
activation,relu
optimizer,SGD


[34m[1mwandb[0m: Agent Starting Run: m94etqux with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	optimizer: Batch


  J = -np.sum(y * np.log(self.yHat)) / len(X)
  J = -np.sum(y * np.log(self.yHat)) / len(X)
  return exp_z / exp_z.sum(axis=1, keepdims=True)
  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁

0,1
accuracy,0.0131
activation,relu
optimizer,Batch


[34m[1mwandb[0m: Agent Starting Run: zh2t09l3 with config:
[34m[1mwandb[0m: 	activation: relu
[34m[1mwandb[0m: 	optimizer: MiniBatch


  _warn_prf(average, modifier, msg_start, len(result))


VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁

0,1
accuracy,0.59389
activation,relu
optimizer,MiniBatch


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [9]:
# Initialize a wandb sweep.
sweep_config = {
    'method': 'grid',
    "metric": {"goal": "maximize", "name": "Accuracy"},
    'name': 'grid_search_example',
    'parameters': {
        'learning_rate': {
            'values': [0.01, 0.001]
        },
        'epochs': {
            'values': [100, 50]
        },
        'hiddenlayersize': {
            'values': [
                [14],
                [18],
                [18, 18]
            ]
        }
    }
}

# Initialize your sweep.
sweep_id = wandb.sweep(sweep_config, project="MLP Hyperparameter tuning on WineQT Dataset")
# best_acc = 0
# best_hyperparameters = None
results = []
# This function will be called for each combination of hyperparameters.
def train_neural_network():
    wandb.init(project="MLP Hyperparameter tuning on WineQT Dataset")
    # Inside this function, you should set your model and train it using the current hyperparameters.
    # Replace this part with your model training logic.
    mlp_sgd = Neural_Network(learning_rate=wandb.config.learning_rate, epochs=wandb.config.epochs, inputLayerSize=12, hiddenLayerSize=wandb.config.hiddenlayersize, outputLayerSize=6)
    mlp_sgd.set_activation_function("sigmoid")
    mlp_sgd.set_optimizer("SGD")
    loss = mlp_sgd.train(X_train, y_train)
    y_pred = mlp_sgd.predict(X_val)
    acc = accuracy_score(y_val, y_pred)
    f1 = f1_score(y_val, y_pred, average='macro') 
    precision = precision_score(y_val, y_pred, average='macro')
    recall = recall_score(y_val, y_pred, average='macro')
    results.append({
        "Learning_rate": wandb.config.learning_rate,
        "Epochs": wandb.config.epochs,
        "Hiddenlayers": wandb.config.hiddenlayersize,
        "Accuracy": acc,
        "F1 Score": f1,
        "Precision": precision,
        "Recall": recall
    })
    # nonlocal best_acc, best_hyperparameters
    # if(acc > best_acc):
    #     best_acc = acc
    #     best_hyperparameters = {
    #         "Learning_rate": wandb.config.learning_rate,
    #         "Epochs": wandb.config.epochs,
    #         "Hiddenlayers": wandb.config.hiddenlayersize,
    #     }
    wandb.log({"Accuracy": acc, "F1 Score": f1, "Precision": precision, "Recall": recall})

# Run the sweep.
wandb.agent(sweep_id, function=train_neural_network)
for i in range(len(results)):
    print(results[i])
print("Best Hyperparameters: ", best_hyperparameters)
df = pd.DataFrame(results)
print("\nTable:\n")
print(df)
# You can access the results using the wandb API after the sweep is complete.


Create sweep with ID: iuygacr0
Sweep URL: https://wandb.ai/mourya/MLP%20Hyperparameter%20tuning%20on%20WineQT%20Dataset/sweeps/iuygacr0


[34m[1mwandb[0m: Agent Starting Run: 113d6870 with config:
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hiddenlayersize: [14]
[34m[1mwandb[0m: 	learning_rate: 0.01
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.21397
F1 Score,0.11154
Precision,0.15268
Recall,0.11839


[34m[1mwandb[0m: Agent Starting Run: 9s69hlu5 with config:
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hiddenlayersize: [14]
[34m[1mwandb[0m: 	learning_rate: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.06987
F1 Score,0.03326
Precision,0.0357
Recall,0.1362


[34m[1mwandb[0m: Agent Starting Run: 0129znsh with config:
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hiddenlayersize: [18]
[34m[1mwandb[0m: 	learning_rate: 0.01
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.21397
F1 Score,0.10502
Precision,0.11876
Recall,0.11721


[34m[1mwandb[0m: Agent Starting Run: 6oxnp4zf with config:
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hiddenlayersize: [18]
[34m[1mwandb[0m: 	learning_rate: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.38428
F1 Score,0.10166
Precision,0.08674
Recall,0.16166


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: y54v2vpb with config:
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hiddenlayersize: [18, 18]
[34m[1mwandb[0m: 	learning_rate: 0.01
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112505555542562, max=1.0…

VBox(children=(Label(value='0.002 MB of 0.002 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.775666…

0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.07424
F1 Score,0.05419
Precision,0.11894
Recall,0.129


[34m[1mwandb[0m: Agent Starting Run: cdfgkx2w with config:
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	hiddenlayersize: [18, 18]
[34m[1mwandb[0m: 	learning_rate: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112713666665917, max=1.0…

  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.40611
F1 Score,0.09627
Precision,0.06769
Recall,0.16667


[34m[1mwandb[0m: Agent Starting Run: m2nnu8tb with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	hiddenlayersize: [14]
[34m[1mwandb[0m: 	learning_rate: 0.01
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.27948
F1 Score,0.15545
Precision,0.12087
Recall,0.29746


[34m[1mwandb[0m: Agent Starting Run: c3d00yrl with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	hiddenlayersize: [14]
[34m[1mwandb[0m: 	learning_rate: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.04803
F1 Score,0.03188
Precision,0.27934
Recall,0.18272


[34m[1mwandb[0m: Agent Starting Run: jjem0k9n with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	hiddenlayersize: [18]
[34m[1mwandb[0m: 	learning_rate: 0.01
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.18777
F1 Score,0.07377
Precision,0.07972
Recall,0.07396


[34m[1mwandb[0m: Agent Starting Run: zjfdxrtb with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	hiddenlayersize: [18]
[34m[1mwandb[0m: 	learning_rate: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.0786
F1 Score,0.04295
Precision,0.18475
Recall,0.08748


[34m[1mwandb[0m: Agent Starting Run: vrabl4gj with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	hiddenlayersize: [18, 18]
[34m[1mwandb[0m: 	learning_rate: 0.01
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.08297
F1 Score,0.06243
Precision,0.08637
Recall,0.23042


[34m[1mwandb[0m: Agent Starting Run: jqst1rao with config:
[34m[1mwandb[0m: 	epochs: 50
[34m[1mwandb[0m: 	hiddenlayersize: [18, 18]
[34m[1mwandb[0m: 	learning_rate: 0.001
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


  _warn_prf(average, modifier, msg_start, len(result))


0,1
Accuracy,▁
F1 Score,▁
Precision,▁
Recall,▁

0,1
Accuracy,0.02183
F1 Score,0.01589
Precision,0.00845
Recall,0.18889


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


{'Learning_rate': 0.01, 'Epochs': 100, 'Hiddenlayers': [14], 'Accuracy': 0.21397379912663755, 'F1 Score': 0.11154235292166326, 'Precision': 0.15268148553589167, 'Recall': 0.11839411742970107}
{'Learning_rate': 0.001, 'Epochs': 100, 'Hiddenlayers': [14], 'Accuracy': 0.06986899563318777, 'F1 Score': 0.03325948648034537, 'Precision': 0.035697399527186764, 'Recall': 0.13620071684587812}
{'Learning_rate': 0.01, 'Epochs': 100, 'Hiddenlayers': [18], 'Accuracy': 0.21397379912663755, 'F1 Score': 0.10502198507978855, 'Precision': 0.11876300227693416, 'Recall': 0.11721169123896095}
{'Learning_rate': 0.001, 'Epochs': 100, 'Hiddenlayers': [18], 'Accuracy': 0.38427947598253276, 'F1 Score': 0.10166076619750741, 'Precision': 0.08674242424242425, 'Recall': 0.16166110493140526}
{'Learning_rate': 0.01, 'Epochs': 100, 'Hiddenlayers': [18, 18], 'Accuracy': 0.07423580786026202, 'F1 Score': 0.05419075667157508, 'Precision': 0.11894021117322089, 'Recall': 0.12899568949590165}
{'Learning_rate': 0.001, 'Epochs'

In [13]:
learning_rates = [0.01, 0.001]
epochs = [100, 50]
hiddenlayersizes = [
    [14],
    [18],
    [18, 18]
]
best_acc = 0.0
best_hyperparameters = None
for i in learning_rates:
    for j in epochs:
        for k in hiddenlayersizes:
            mlp_sgd = Neural_Network(learning_rate=i, epochs=j, inputLayerSize=12, hiddenLayerSize=k, outputLayerSize=6)
            mlp_sgd.set_activation_function("sigmoid")
            mlp_sgd.set_optimizer("SGD")
            loss = mlp_sgd.train(X_train, y_train)
            y_pred = mlp_sgd.predict(X_val)
            acc = accuracy_score(y_val, y_pred)
            f1 = f1_score(y_val, y_pred, average='macro') 
            precision = precision_score(y_val, y_pred, average='macro')
            recall = recall_score(y_val, y_pred, average='macro')
            results.append({
                "Learning_rate": i,
                "Epochs": j,
                "Hiddenlayers": k,
                "Accuracy": acc,
                "F1 Score": f1,
                "Precision": precision,
                "Recall": recall
            })
            if(acc > best_acc):
                best_acc = acc
                best_hyperparameters = {
                    "Learning_rate": i,
                    "Epochs": j,
                    "Hiddenlayers": k,
                }
            # print("Learning Rate: ", i, "Epochs: ", j, "Hidden Layer Sizes: ", k, "Accuracy: ", acc)
            # print("F1 Score: ", f1, "Precision: ", precision, "Recall: ", recall)
            # print("Loss: ", loss)
            # print("\n")
df = pd.DataFrame(results)
print("\nTable:\n")
print(df)
print("Best Hyperparameters: ", best_hyperparameters)
print("Best Accuracy: ", best_acc)


Table:

    Learning_rate  Epochs Hiddenlayers  Accuracy  F1 Score  Precision  \
0           0.010     100         [14]  0.213974  0.111542   0.152681   
1           0.001     100         [14]  0.069869  0.033259   0.035697   
2           0.010     100         [18]  0.213974  0.105022   0.118763   
3           0.001     100         [18]  0.384279  0.101661   0.086742   
4           0.010     100     [18, 18]  0.074236  0.054191   0.118940   
5           0.001     100     [18, 18]  0.406114  0.096273   0.067686   
6           0.010      50         [14]  0.279476  0.155447   0.120874   
7           0.001      50         [14]  0.048035  0.031885   0.279335   
8           0.010      50         [18]  0.187773  0.073765   0.079724   
9           0.001      50         [18]  0.078603  0.042953   0.184755   
10          0.010      50     [18, 18]  0.082969  0.062433   0.086374   
11          0.001      50     [18, 18]  0.021834  0.015893   0.008452   
12          0.010     100         [14]  0.

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [19]:
np.random.seed(41)
mlp_sgd = Neural_Network(learning_rate=0.01,epochs=5000, inputLayerSize=12, hiddenLayerSize=[18], outputLayerSize=6)
mlp_sgd.set_activation_function('sigmoid')
mlp_sgd.set_optimizer('SGD')
mlp_sgd.set_learning_rate(0.01)
# print(l)
mlp_sgd.max_iterations = 500;
mlp_sgd.set_hidden_layers([18])
loss = mlp_sgd.train(X_train, y_train)
y_pred = mlp_sgd.predict(X_test)
acc = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='macro') 
precision = precision_score(y_test, y_pred, average='macro')
recall = recall_score(y_test, y_pred, average='macro')
print(acc, loss)

0.35807860262008734 1.4511747617428121


  _warn_prf(average, modifier, msg_start, len(result))


# 2.4

###### 

In [6]:
class MultilabelMLP(object):
    def __init__(self, learning_rate=0.01,epochs = 5000, inputLayerSize: int = 2, hiddenLayerSize = [3,3,3] , outputLayerSize: int = 1):
        self.inputLayerSize = inputLayerSize
        self.outputLayerSize = outputLayerSize
        self.hiddenLayerSize = hiddenLayerSize
        self.activation = self.relu
        self.activationPrime = self.reluPrime
        self.learning_rate = learning_rate
        # Weights (parameters)
        self.num_layers = len(hiddenLayerSize)+1
        self.weights = []
        self.weights.append(np.random.randn(self.inputLayerSize, self.hiddenLayerSize[0]))
        for i in range(1,self.num_layers-1):
            self.weights.append(np.random.randn(self.hiddenLayerSize[i-1], self.hiddenLayerSize[i]))
        self.weights.append(np.random.randn(self.hiddenLayerSize[-1], self.outputLayerSize))
        self.optimizer = 'SGD'
        self.max_iterations = epochs

    def forward(self, X):
        self.z = [None] * (len(self.weights))
        self.a = [None] * (len(self.weights)-1)
        
        self.z[0] = np.dot(X, self.weights[0])
        for i in range(0,len(self.a)):
            self.a[i] = self.activation(self.z[i])
            self.z[i+1] = np.dot(self.a[i],self.weights[i+1])
        yHat = self.softmax(self.z[-1])
        return yHat

    def relu(self, z):
        return np.maximum(0, z)  # ReLU activation

    def tanh(self, z):
        return np.tanh(z)  # Tanh activation

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def sigmoidPrime(self, z):
        return np.exp(-z) / ((1 + np.exp(-z))**2)

    def softmax(self, z):
        exp_z = np.exp(z - np.max(z))  # Subtracting max(z) for numerical stability
        return exp_z / exp_z.sum(axis=1, keepdims=True)

    def costFunction(self, X, y):
        self.yHat = self.forward(X)
        J = -np.sum(y * np.log(self.yHat)) / len(X)
        return J

    def costFunctionPrime(self, X, y):
        self.yHat = self.forward(X)
        deltas = [None] * (len(self.weights))
        deltas[-1] = self.yHat - y
        dJdWs = [None] * (len(self.weights))
        for i in range(len(deltas)-2,-1,-1):
            dJdWs[i+1] = np.dot(self.a[i].T, deltas[i+1])
            deltas[i] = np.dot(deltas[i+1],self.weights[i+1].T)*self.activationPrime(self.z[i])
        dJdWs[0] = np.dot(X.T, deltas[0])
        
        # delta3 = self.yHat - y
        # dJdW2 = np.dot(self.a2.T, delta3)

        # delta2 = np.dot(delta3, self.W2.T) * self.activationPrime(self.z2)  # Use ReLU derivative
        # dJdW1 = np.dot(X.T, delta2)

        return dJdWs

    def reluPrime(self, z):
        return np.where(z > 0, 1, 0)  # Derivative of ReLU

    def tanhPrime(self, z):
        return 1 - np.tanh(z)**2  # Derivative of Tanh

    def backward(self, X, y, learning_rate):
        dJdWs = self.costFunctionPrime(X, y)
        for i in range(len(self.weights)):
            self.weights[i] -= learning_rate * dJdWs[i]
        # self.W1 -= learning_rate * dJdW1
        # self.W2 -= learning_rate * dJdW2
    def train(self, X, y, batch_size = 32, learning_rate=0.1, max_iterations=10000):
        if(self.optimizer == 'SGD'):
            return self.train_sgd(X=X, y=y)
        elif(self.optimizer == 'Batch'):
            return self.train_batch(X=X, y=y)
        elif(self.optimizer == 'MiniBatch'):
            return self.train_mini_batch(X=X, y=y)
    def train_sgd(self, X, y, learning_rate=0.1, max_iterations=1000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on MLP Classifier for advertisement Dataset")
        for i in range(self.max_iterations):
            idx = np.random.randint(len(X))
            self.backward(X[idx:idx+1], y[idx:idx+1], learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                wandb.log({"Loss": loss, "Epoch": i+1})
                # print("Iteration %d: loss = %f" % (i, loss))
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")
    def train_batch(self, X, y, learning_rate=0.0001, max_iterations=1000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on MLP Classifier for advertisement Dataset")
        for i in range(self.max_iterations):
            self.backward(X, y, learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                wandb.log({"Loss": loss, "Epoch": i+1})
                # print("Iteration %d: loss = %f" % (i, loss))
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")
    def train_mini_batch(self, X, y, batch_size=32, learning_rate=0.01, max_iterations=1000, wand = 0):
        if(wand == 1):
            wandb.init(project="Reporting loss on MLP Classifier for advertisement Dataset")
        for i in range(self.max_iterations):
            indices = np.arange(len(X))
            np.random.shuffle(indices)
            for j in range(0, len(X), batch_size):
                batch_indices = indices[j:j+batch_size]
                self.backward(X[batch_indices], y[batch_indices], learning_rate)
            if i % 1 == 0:
                loss = self.costFunction(X, y)
                wandb.log({"Loss": loss, "Epoch": i+1})
                # print("Iteration %d: loss = %f" % (i, loss))
        loss = self.costFunction(X, y)
        return loss
        print("Training completed.")

    def predict(self, X, y):
        yHat = self.forward(X)
        binary_predictions = (yHat > 0.2).astype(int)
        return np.array(binary_predictions)
        # return np.argmax(yHat, axis=1)
    def set_learning_rate(self, learning_rate):
        self.learning_rate = learning_rate

    def set_activation_function(self, activation_function):
        if(activation_function == 'sigmoid'):
            self.activation = self.sigmoid
            self.activationPrime = self.sigmoidPrime
        if(activation_function == 'relu'):
            self.activation = self.relu
            self.activationPrime = self.reluPrime
        if(activation_function == 'tanh'):
            self.activation = self.tanh
            self.activationPrime = self.tanhPrime

    def set_optimizer(self, optimizer):
        self.optimizer = optimizer

    def set_hidden_layers(self, hidden_layer_sizes):
        self.hiddenLayerSize = hidden_layer_sizes
        self.num_layers = len(self.hiddenLayerSize)+1
        self.weights = []
        self.weights.append(np.random.randn(self.inputLayerSize, self.hiddenLayerSize[0]))
        for i in range(1,self.num_layers-1):
            self.weights.append(np.random.randn(self.hiddenLayerSize[i-1], self.hiddenLayerSize[i]))
        self.weights.append(np.random.randn(self.hiddenLayerSize[-1], self.outputLayerSize))
        # self.W1 = np.random.randn(self.inputLayerSize, self.hiddenLayerSize)
        # self.W2 = np.random.randn(self.hiddenLayerSize, self.outputLayerSize)
        # self.weights, self.biases = self.initialize_weights()
    def set_epochs(self, epcohs):
        self.max_iterations = epochs
        print(self.max_iterations)


In [7]:

scaler = StandardScaler()
minmax_scaler = MinMaxScaler()
data = pd.read_csv('advertisement.csv')
data.fillna(method='ffill', inplace=True) 
categorical_cols = ['gender', 'education', 'married', 'city', 'occupation', 'most bought item']
data_encoded = pd.get_dummies(data, columns=categorical_cols, drop_first=True)
X = data_encoded.drop('labels', axis=1)  
y = data_encoded['labels']  
X = scaler.fit_transform(X)
mlb = MultiLabelBinarizer()
y = mlb.fit_transform(y.str.split(' '))
# print(y)
y_fin = []
for i in range(len(y)):
    # print(y[i])
    # print(np.sum(y[i]))
    x = y[i]/np.sum(y[i])
    y_fin.append(x)
y = y_fin
y = np.array(y)
print(y)
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
print(X_train.shape)
print(y_train.shape)

[[0.         0.         0.33333333 ... 0.         0.         0.33333333]
 [0.5        0.         0.         ... 0.5        0.         0.        ]
 [0.         0.         0.25       ... 0.         0.         0.25      ]
 ...
 [0.         0.         0.5        ... 0.         0.         0.5       ]
 [1.         0.         0.         ... 0.         0.         0.        ]
 [0.         0.5        0.5        ... 0.         0.         0.        ]]
(600, 1010)
(600, 8)


In [8]:
mask = (y_val > 0)
y_val[mask] = 1
print(y_val)

[[0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 1. 0. 0.]
 ...
 [1. 0. 0. ... 0. 1. 1.]
 [0. 0. 0. ... 1. 0. 0.]
 [1. 1. 0. ... 0. 0. 0.]]


In [9]:
wandb.login()
np.random.seed(0)
mlp_sgd = MultilabelMLP(inputLayerSize=1010, hiddenLayerSize=[1000], outputLayerSize=8)
mlp_sgd.set_activation_function('sigmoid')
loss = mlp_sgd.train_sgd(X_train, y_train, wand=1)
y_pred = mlp_sgd.predict(X_val, y_train)
cnt = 0
for i in range(len(y_pred)):
    for j in range(len(y_pred[0])):
        if((y_pred[i][j] > 0 and y_val[i][j] == 0) or (y_pred[i][j] == 0 and y_val[i][j] > 0)):
            cnt += 1
print("Loss:", loss)
print("Accuracy:", 1-hamming_loss(y_val,y_pred))
print("F1 Score", f1_score(y_val,y_pred, average='macro'))
print("Precision", precision_score(y_val,y_pred, average='macro'))
print("Recall", recall_score(y_val,y_pred, average='macro'))
print(cnt)
print(len(y_pred)*len(y_pred[0]))

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mlsamourya07[0m ([33mmourya[0m). Use [1m`wandb login --relogin`[0m to force relogin


VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011113617199993719, max=1.0…

Loss: 13.914022483562807
Accuracy: 0.75075
F1 Score 0.4526455953468639
Precision 0.9545214404846567
Recall 0.33399342062024456
997
4000


wandb: Network error (ConnectionError), entering retry loop.


In [None]:
print(y_train)
y_temp = copy(y_train)
mask = (y_temp > 0)
y_temp[mask] = 1
# print(y_train)
y_train_pred = mlp_sgd.predict(X_train, y_temp)
cnt = 0
for i in range(len(y_train_pred)):
    for j in range(len(y_train_pred[0])):
        if((y_train_pred[i][j] > 0 and y_temp[i][j] == 0) or (y_train_pred[i][j] == 0 and y_temp[i][j] > 0)):
            cnt += 1
print("Loss:", loss)
# print(y_train)
# print(y_train_pred)
print("Accuracy:", 1-hamming_loss(y_temp,y_train_pred))
print("F1 Score", f1_score(y_temp,y_train_pred, average='macro'))
print("Precision", precision_score(y_temp,y_train_pred, average='macro'))
print("Recall", recall_score(y_temp,y_train_pred, average='macro'))
print(cnt)
print(len(y_train_pred)*len(y_train_pred[0]))
print(y_train)

In [None]:
activations = ["sigmoid", "tanh", "relu"]
optimizer = ["SGD","Batch","MiniBatch"]
for i in activations:
    for j in optimizer:
        np.random.seed(27)
        mlp_sgd = MultilabelMLP(inputLayerSize=1010, hiddenLayerSize=[1000], outputLayerSize=8)
        mlp_sgd.set_activation_function(i)
        mlp_sgd.set_optimizer(j)
        mlp_sgd.max_iterations = 100
        loss = mlp_sgd.train(X_train, y_train)
        y_pred = mlp_sgd.predict(X_val, y_train)
        print("Parameters: ", i, j)
        acc = 1-hamming_loss(y_val, y_pred)
        f1 = f1_score(y_val, y_pred, average='macro') 
        precision = precision_score(y_val, y_pred, average='macro')
        recall = recall_score(y_val, y_pred, average='macro')
        print("Accuracy: ", acc, "F1 Score:", f1, "Precision:", precision, "Recall:", recall)
        

In [None]:
# activation = ["sigmoid", "tanh", "relu"]
# optimizer = ["SGD", "Batch", "MiniBatch"]
activation = ["sigmoid"]
optimizer = ["MiniBatch"]
learning_rates = [0.1,0.01,0.001,0.0001]
epochs = [10,50,100]
hiddenlayersize = [[100,100], [100], [500], [1000], [1000,1000]]
results = []
hyperparameters = []
for i in activation:
    for j in optimizer:
        for k in learning_rates:
            for l in epochs:
                for m in hiddenlayersize:
                    mlp_sgd = MultilabelMLP(inputLayerSize=1010, hiddenLayerSize=[100], outputLayerSize=8)
                    mlp_sgd.set_activation_function(i)
                    mlp_sgd.set_optimizer(j)
                    mlp_sgd.set_learning_rate(k)
                    # print(l)
                    mlp_sgd.max_iterations = l;
                    # print(mlp_sgd.max_iterations)
                    # mlp_sgd.set_epochs(l)
                    # print(mlp_sgd.max_iterations)
                    mlp_sgd.set_hidden_layers(m)
                    # print(mlp_sgd.max_iterations)
                    loss = mlp_sgd.train(X_train, y_train)
                    y_pred = mlp_sgd.predict(X_val, y_train)
                    acc = 1-hamming_loss(y_val, y_pred)
                    f1 = f1_score(y_val, y_pred, average='macro') 
                    precision = precision_score(y_val, y_pred, average='macro')
                    recall = recall_score(y_val, y_pred, average='macro')
                    print(acc, [i,j,k,l,m])
                    results.append([acc, f1, precision, recall])
                    hyperparameters.append([i, j, k, l, m])
result_df = pd.DataFrame(results, columns=['Accuracy', 'F1 Score', 'Precision', 'Recall'])
result_df['Activation'] = [param[0] for param in hyperparameters]
result_df['Optimizer'] = [param[1] for param in hyperparameters]
result_df['Learning Rate'] = [param[2] for param in hyperparameters]
result_df['Epochs'] = [param[3] for param in hyperparameters]
result_df['Hidden Layer Size'] = [param[4] for param in hyperparameters]

best_hyperparameters = result_df.loc[result_df['Accuracy'].idxmax()]

print("Best Hyperparameters:")
print(best_hyperparameters)

print("\nResults Table:")
print(result_df)            
                    


In [None]:
np.random.seed(0)
mlp_sgd = MultilabelMLP(inputLayerSize=1010, hiddenLayerSize=[1000], outputLayerSize=8)
mlp_sgd.set_activation_function('sigmoid')
loss = mlp_sgd.train_sgd(X_train, y_train)
y_pred = mlp_sgd.predict(X_test, y_train)
mask = (y_test > 0)
y_test[mask] = 1
print(y_test)
cnt = 0
for i in range(len(y_pred)):
    for j in range(len(y_pred[0])):
        if((y_pred[i][j] > 0 and y_test[i][j] == 0) or (y_pred[i][j] == 0 and y_test[i][j] > 0)):
            cnt += 1
print("Loss:", loss)
print("Accuracy:", 1-hamming_loss(y_test,y_pred))
print("F1 Score", f1_score(y_test,y_pred, average='macro'))
print("Precision", precision_score(y_test,y_pred, average='macro'))
print("Recall", recall_score(y_test,y_pred, average='macro'))
print(cnt)
print(len(y_pred)*len(y_pred[0]))

Logistic regression is a simple and interpretable model suitable for linear problems, while MLP is a more complex model capable of handling nonlinear data relationships. The choice between them depends on the dataset's complexity and the specific task at hand. For linear problems or when simplicity is preferred, logistic regression may suffice, but for complex, nonlinear problems, an MLP can provide better results if properly trained and regularized.
For the WineQT dataset, both the models are giving more or less the same accuracy