# ***0. Data Loading***

In [71]:
#import necessary libraries
import numpy as np
import pandas as pd

np.random.seed(68)

#load the training and test1 data
traindata = pd.read_csv('UNSWNB15_training_coursework.csv')
testdata = pd.read_csv('UNSWNB15_testing1_coursework.csv')

#display the rows and columns
traindata.head()
testdata.head()

#check the shape of data
traindata.shape
testdata.shape



(4000, 44)

# ***1. Data Pre-Processing (Task 1)***

In [72]:
"""encode categorical variables, align columns, and standardize numerical features using z-score normalization.
to ensure numerical stability and model convergence."""

#identify categorical data and perform one hot encoding
categorical_data= traindata.select_dtypes(include=['object']).columns
print(categorical_data)
traindata = pd.get_dummies(traindata, columns=categorical_data)
testdata = pd.get_dummies(testdata, columns=categorical_data)


#align both the dataset to ensure both datasets have same feature columns
traindata, testdata = traindata.align(testdata, join='left', axis=1,fill_value=0)

# seperate the features and labels
trainx= traindata.drop(columns=['label']).values #features
trainy= traindata['label'].values.reshape(-1,1)  #label
testx1= testdata.drop(columns=['label']).values
testy1 = testdata['label'].values.reshape(-1,1)

#convert the data to numeric float
trainx = np.asarray(trainx, dtype=np.float64)
testx1 = np.asarray(testx1, dtype=np.float64)

#standardization
mean_avg= np.mean(trainx,axis=0)
st_dev= np.std(trainx,axis=0)
trainx= (trainx-mean_avg)/st_dev
testx1= (testx1-mean_avg)/st_dev

Index(['proto', 'service', 'state'], dtype='object')


# ***2. Model Implementation and Training (Task 2)***

In [73]:
"""implementing a Multi-Layer Perceptron (MLP) from scratch using NumPy,
including forward & backward propagation with binary class entropy using batch gradient descent.
after network is trained on the processed data."""

#activation functions:(relu for the hidden layer and sigmoid for  the output layer)
#relu function
def relu(x):
    return np.maximum(0, x)

#relu derivative
def relu_deri(x):
    return np.where(x > 0, 1, 0)

#sigmoid function
def sigmoid(x):
    return np.where(x >= 0,
                    1/(1 + np.exp(-x)),
                    np.exp(x)/(1 + np.exp(x)))
#sigmoid derivative
def sigmoid_deri(x):
    sig = sigmoid(x)
    return sig * (1 - sig)

#loss function ( binary cross-entropy is used to measure the difference between predicted and actual labels for binary classification)
def binarycross_entropy_(y_true, y_pred):
     y_pred = np.clip(y_pred, 1e-7, 1 - 1e-7)
     return -(y_true * np.log(y_pred)+(1 - y_true) * np.log(1 - y_pred)).mean()

#mlp model
class MLP:
    def __init__(self, input, hiddenlayer1, hiddenlayer2, output):
        #initialization weights and bias for layer1 using He initialization which helps maintain a stable variance of activations and gradients throughout the network, preventing exploding or vanishing gradients.
        self.w_hidden1 = np.random.randn(input,hiddenlayer1) * np.sqrt(2/input)
        self.b_h1= np.zeros((1,hiddenlayer1))

        #initialization weights and bias for layer2
        self.w_hidden2 = np.random.randn(hiddenlayer1,hiddenlayer2) * np.sqrt(2/hiddenlayer1)
        self.b_h2 = np.zeros((1,hiddenlayer2))

        #initialization weights and bias for output
        self.w_output = np.random.randn(hiddenlayer2,output) * np.sqrt(2/hiddenlayer2)
        self.b_output =np.zeros((1,output))

    #forward pass(predict output)
    def forward(self, x):
        self.h_input1 = np.dot(x,self.w_hidden1) + self.b_h1
        self.h_output1 = relu(self.h_input1)

        self.h_input2 = np.dot(self.h_output1,self.w_hidden2) + self.b_h2
        self.h_output2 = relu(self.h_input2)

        # from inputs, weights and biases calculate the output values
        self.finalinput = np.dot(self.h_output2,self.w_output) + self.b_output
        self.finaloutput = sigmoid(self.finalinput)

        return self.finaloutput

    #backward pass (calculate how much each neuron contributed to the total error.)
    def backward(self, x, y_true, learningrate=0.001):
        #compute output layer error and delta
        error = self.finaloutput - y_true
        delta = error *  sigmoid_deri(self.finalinput)

        #backpropagate through hidden layer 2
        error2 =  delta.dot(self.w_output.T)
        delta2 =  error2 * relu_deri(self.h_input2)

        #backpropagate through hidden layer 1
        error1 = delta2.dot(self.w_hidden2.T)
        delta1 = error1 * relu_deri(self.h_input1)

        #-----update weights and biases using gradient descent----

        #update weight and bias for output layer
        self.w_output -= learningrate * self.h_output2.T.dot(delta)
        self.b_output -=  learningrate * np.sum(delta, axis=0, keepdims=True)

        #update weight and bias for hidden layer 2
        self.w_hidden2 -=  learningrate * self.h_output1.T.dot(delta2)
        self.b_h2 -= learningrate * np.sum(delta2, axis=0, keepdims=True)

        #update weight and bias for hidden layer 1
        self.w_hidden1 -= learningrate * x.T.dot(delta1)
        self.b_h1 -= learningrate * np.sum(delta1, axis=0, keepdims=True)

    #train the MLP model
    def train(self,x,y,epochs=300,learningrate=0.001):
        for epoch in range(epochs):
            ypred = self.forward(x)                    # Forward pass (y_pred = predicted output)
            loss = binarycross_entropy_(y, ypred)      # Compute loss (y = label data)
            self.backward(x, y, learningrate)          # Backward pass (update weights)

            # for every 100 epochs print loss and accuracy
            if (epoch + 1) % 100 == 0:
                predictions = (ypred > 0.5).astype(int)
                accuracy = (predictions == y).mean()
                print(f'epoch:{epoch+1} loss:{loss:.4f} accuracy:{accuracy:.4f}')


#--------model implementation and training------------

#initialize MLP architecture: input -> 64 -> 32 -> 1
mlp = MLP(input=trainx.shape[1],hiddenlayer1=64,hiddenlayer2=32,output=1)

#training the mlp model on trianing data
mlp.train(trainx,trainy,epochs=1000,learningrate=0.001)

epoch:100 loss:0.8284 accuracy:0.8865
epoch:200 loss:0.8219 accuracy:0.8921
epoch:300 loss:0.1906 accuracy:0.9127
epoch:400 loss:0.1935 accuracy:0.9166
epoch:500 loss:0.2079 accuracy:0.9150
epoch:600 loss:0.1549 accuracy:0.9395
epoch:700 loss:0.1451 accuracy:0.9482
epoch:800 loss:0.1592 accuracy:0.9353
epoch:900 loss:0.3029 accuracy:0.8908
epoch:1000 loss:0.1528 accuracy:0.9402


# ***3. Model Performance Evaluation (Task 3)***

In [74]:
""" model performance evaluation on test set1
using confusion matrix,accuracy, balanced accuracy,recall,precision and F1-score
and also predicting labels for test set2."""

test1_predictions =(mlp.forward(testx1) > 0.5).astype(int)
test1_accuracy =(test1_predictions == testy1).mean()
print(f"Accuracy on Testing Set 1: {test1_accuracy:.4f}\n")

def evaluation(ytrue_, ypred_):
    TrueP =np.sum((ytrue_==1) & (ypred_==1))
    TrueN =np.sum((ytrue_==0) & (ypred_==0))
    FalseP =np.sum((ytrue_==0) & (ypred_==1))
    FalseN =np.sum((ytrue_==1) & (ypred_==0))

    #accuracy calculation
    accuracy =(TrueP + TrueN) / (TrueP + TrueN + FalseP + FalseN)
    #precision calculation
    precision =TrueP / (TrueP + FalseP) if (TrueP + FalseP) else 0
    #recall calculation
    recall =TrueP / (TrueP + FalseN) if (TrueP + FalseN) else 0
    #f1-score calculation
    f1score =(2* precision* recall) / (precision + recall) if (precision + recall) else 0
    #balanced accuracy calculation
    TruePR = TrueP / (TrueP + FalseN)
    TrueNR = TrueN / (TrueN + FalseP)
    balanced_accuracy = 0.5 * (TruePR + TrueNR)

    print(f" Confusion matrix:\n TP: {TrueP}, FP: {FalseP}\n FN: {FalseN}, TN: {TrueN}\n")
    print(f" Accuracy:{accuracy:.4f}")
    print(f" Balanced Accuracy:{balanced_accuracy:.4f}\n")
    print(f" Recall:{recall:.4f}")
    print(f" Precision:{precision:.4f}")
    print(f" f1 Score:{f1score:.4f}\n")



#performance measure
evaluation(testy1,test1_predictions)

#------------------predicting the labels for test set 2-------------
#load the testset 2 data
test_data2 =pd.read_csv('UNSWNB15_testing2_coursework_no_label.csv')

#perform one hot encoding on test set 2
test_data2 =pd.get_dummies(test_data2, columns=categorical_data)

#align the columns to match the training data
test_data2 =test_data2.reindex(columns=traindata.drop(columns=['label']).columns, fill_value=0)

#standardization
test2_x =test_data2.values.astype(np.float64)
test2_x =(test2_x - mean_avg) / st_dev

#predicting the labels for test set 2
test2_predictions =(mlp.forward(test2_x) > 0.5).astype(int)

print("Predictions for Test Set 2:")
print(test2_predictions.flatten())

Accuracy on Testing Set 1: 0.9050

 Confusion matrix:
 TP: 2373, FP: 61
 FN: 319, TN: 1247

 Accuracy:0.9050
 Balanced Accuracy:0.9174

 Recall:0.8815
 Precision:0.9749
 f1 Score:0.9259

Predictions for Test Set 2:
[0 0 1 1 0 1 0 0 1 0 0 1 1 1 1 0 1 1 0 1 1 1 0 1 1]


# ***4. Performance  Evaluation***

Accuracy on Testing Set 1:  0.9050


Predicted class labels for the data samples in the testing set 2 below:(0 or 1)

| **Sample ID** |**Predicted Label** |
| --- | --- |
| 1 | 0  |
| 2 | 0  |
| 3 | 1  |
| 4 | 1  |
| 5 | 0  |
| 6 | 1  |
| 7 | 0  |
| 8 | 0  |
| 9 |  1 |
| 10 | 0  |
| 11 |  0 |
| 12 | 1 |
| 13 |  1 |
| 14 |  1|
| 15 |  1 |
| 16 | 0|
| 17 |  1|
| 18 |  1|
| 19 |  0|
| 20 |  1|
| 21 |  1|
| 22 |  1|
| 23 | 0 |
| 24 |  1 |
| 25 | 1 |