Import needed libraries

In [1]:
import numpy as np
from sklearn.datasets import make_gaussian_quantiles
from sklearn.model_selection import train_test_split

Define a class for data preparation and class calculations

In [4]:
class Network:
    # define class attributes and objects and initialize them
    def __init__(self, hls=1000, ols=2, ne=1000, lr=0.01):
        self.input_layer_size = None
        self.hidden_layer_size = hls
        self.output_layer_size = ols
        self.learning_rate = lr
        self.W1 = None
        self.W2 = None
        self.b1 = None
        self.b2 = None
        self.Z1 = None
        self.Z2 = None
        self.A1 = None
        self.A2 = None
        self.dW1 = None
        self.dW2 = None
        self.db1 = None
        self.db2 = None
        self.num_epochs = ne

    # spilt the data to train and test sets
    def data_split(self):
        np.random.seed(42)
        X, y = make_gaussian_quantiles(n_samples=300, n_features=8, n_classes=2, random_state=42)
        y = np.eye(2)[y]
        self.X_train, self.X_test, self.y_train, self.y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        self.input_layer_size = self.X_train.shape[1]

    # initialize weights randomely
    def weight_initialization(self):
        self.W1 = np.random.randn(self.input_layer_size, self.hidden_layer_size) * 0.01
        self.b1 = np.zeros((1, self.hidden_layer_size))
        self.W2 = np.random.randn(self.hidden_layer_size, self.output_layer_size) * 0.01
        self.b2 = np.zeros((1, self.output_layer_size))

    # train the weights on model
    def train(self):

        for e in range(self.num_epochs):
            self.forward_propagation()
            mse = self.MSE()
            self.backward_propagation()
            self.weight_update()

            if e % 300 == 0:  # report the results in each 50 iterations
                print(f"epoch {e}, train loss: {mse}")

    # forward pass
    def forward_propagation(self):
        self.Z1 = np.dot( self.X_train, self.W1) + self.b1
        self.A1 = self.relu(self.Z1)
        self.Z2 = np.dot(self.A1, self.W2) + self.b2
        self.A2 = self.sigmoid(self.Z2)


    # backward pass and propagating error
    def backward_propagation(self):

        m = self.X_train.shape[0]
        dZ2 = self.A2 - self.y_train
        self.dW2 = np.dot(self.A1.T, dZ2) / m
        self.db2 = np.sum(dZ2, axis=0, keepdims=True) / m
        dA1 = np.dot(dZ2, self.W2.T)
        dZ1 = dA1 * np.where(self.Z1 > 0, 1, 0)
        self.dW1 = np.dot(self.X_train.T, dZ1) / m
        self.db1 = np.sum(dZ1, axis=0, keepdims=True) / m
    

    # calculate error
    def MSE(self):
        return np.mean((self.y_train - self.A2)**2)

    # update weights after each iteration
    def weight_update(self):
        self.W1 -= self.learning_rate * self.dW1
        self.b1 -= self.learning_rate * self.db1
        self.W2 -= self.learning_rate * self.dW2
        self.b2 -= self.learning_rate * self.db2

    # activation function for hidden layer
    def relu(self, z):
        return np.maximum(0, z)

    # activation function for output
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))
    
    # test the model with test data
    def test(self):
        Z1 = np.dot( self.X_test, self.W1) + self.b1
        A1 = self.relu(Z1)
        Z2 = np.dot(A1, self.W2) + self.b2
        A2 = self.sigmoid(Z2)
        pred = (A2 > 0.5).astype(float)

        print(f"Test accuracy: {np.mean(pred == self.y_test) * 100:.2f}%", )

### main program

learning rate=0.1 and epochs=1000: accuracy=95%

In [61]:
net = Network(lr=0.1)
net.data_split()
net.weight_initialization()
net.train()
net.test()

epoch 0, train loss: 0.24984946785413803
epoch 50, train loss: 0.2444588119572477
epoch 100, train loss: 0.23494592810874734
epoch 150, train loss: 0.21947901010232287
epoch 200, train loss: 0.20070435267680425
epoch 250, train loss: 0.1820776557003048
epoch 300, train loss: 0.16498963899456662
epoch 350, train loss: 0.1493627035603874
epoch 400, train loss: 0.13480501924377206
epoch 450, train loss: 0.12112839094784768
epoch 500, train loss: 0.10831263991924865
epoch 550, train loss: 0.09631048600038668
epoch 600, train loss: 0.08511344996972135
epoch 650, train loss: 0.07467271967484534
epoch 700, train loss: 0.06509366523361806
epoch 750, train loss: 0.05639367604844618
epoch 800, train loss: 0.04872261099777397
epoch 850, train loss: 0.041931370322057106
epoch 900, train loss: 0.03600881049560297
epoch 950, train loss: 0.03084239494663581
Test accuracy: 95.00%


learning rate=0.1 and epochs=5000: accuracy=96.67%

In [5]:
net = Network(lr=0.1, ne=5000)
net.data_split()
net.weight_initialization()
net.train()
net.test()

epoch 0, train loss: 0.24984946785413803
epoch 300, train loss: 0.16498963899456662
epoch 600, train loss: 0.08511344996972135
epoch 900, train loss: 0.03600881049560297
epoch 1200, train loss: 0.014246905814059218
epoch 1500, train loss: 0.006222533764400692
epoch 1800, train loss: 0.0030897444155854005
epoch 2100, train loss: 0.0017192748429944366
epoch 2400, train loss: 0.001047538607213062
epoch 2700, train loss: 0.0006855021381624295
epoch 3000, train loss: 0.00047344263078874044
epoch 3300, train loss: 0.00034123801809129805
epoch 3600, train loss: 0.00025439994045856155
epoch 3900, train loss: 0.00019513346881393473
epoch 4200, train loss: 0.00015353350127888157
epoch 4500, train loss: 0.00012334168868007516
epoch 4800, train loss: 0.00010078860043547762
Test accuracy: 96.67%


learning rate=0.01 and epochs=1000: accuracy=61.67%

In [6]:
net = Network(lr=0.01)
net.data_split()
net.weight_initialization()
net.train()
net.test()

epoch 0, train loss: 0.24984946785413803
epoch 300, train loss: 0.24684850530211744
epoch 600, train loss: 0.24298118358770093
epoch 900, train loss: 0.23726172057292036
Test accuracy: 61.67%


learning rate=0.01 and epochs=5000: accuracy=83.33%

In [7]:
net = Network(lr=0.01, ne=5000)
net.data_split()
net.weight_initialization()
net.train()
net.test()

epoch 0, train loss: 0.24984946785413803
epoch 300, train loss: 0.24684850530211744
epoch 600, train loss: 0.24298118358770093
epoch 900, train loss: 0.23726172057292036
epoch 1200, train loss: 0.229256755790356
epoch 1500, train loss: 0.21928969864880835
epoch 1800, train loss: 0.20812320240516832
epoch 2100, train loss: 0.196662253868069
epoch 2400, train loss: 0.18546085596035428
epoch 2700, train loss: 0.17481308478415858
epoch 3000, train loss: 0.16476366528509717
epoch 3300, train loss: 0.15522562152590702
epoch 3600, train loss: 0.14613959641836233
epoch 3900, train loss: 0.13740311086840243
epoch 4200, train loss: 0.12900299666169807
epoch 4500, train loss: 0.12090419394187611
epoch 4800, train loss: 0.1131156737507235
Test accuracy: 83.33%


learning rate=0.9 and epochs=1000: accuracy=96.67%

In [8]:
net = Network(lr=0.9)
net.data_split()
net.weight_initialization()
net.train()
net.test()

epoch 0, train loss: 0.24984946785413803
epoch 300, train loss: 0.0006962770862129016
epoch 600, train loss: 6.979939969569984e-05
epoch 900, train loss: 2.1291029765191156e-05
Test accuracy: 96.67%


learning rate=0.9 and epochs=5000: accuracy=96.67%  (no improvement with increamenting number of epochs)

In [9]:
net = Network(lr=0.9, ne=5000)
net.data_split()
net.weight_initialization()
net.train()
net.test()

epoch 0, train loss: 0.24984946785413803
epoch 300, train loss: 0.0006962770862129016
epoch 600, train loss: 6.979939969569984e-05
epoch 900, train loss: 2.1291029765191156e-05
epoch 1200, train loss: 9.628621458520082e-06
epoch 1500, train loss: 5.313112831365957e-06
epoch 1800, train loss: 3.3096475400973767e-06
epoch 2100, train loss: 2.233532440936013e-06
epoch 2400, train loss: 1.5956205410268718e-06
epoch 2700, train loss: 1.1890534165993882e-06
epoch 3000, train loss: 9.159109621221551e-07
epoch 3300, train loss: 7.245059170458667e-07
epoch 3600, train loss: 5.857036296312873e-07
epoch 3900, train loss: 4.821356853608939e-07
epoch 4200, train loss: 4.0304618139444854e-07
epoch 4500, train loss: 3.413494556510384e-07
epoch 4800, train loss: 2.9235729506400524e-07
Test accuracy: 96.67%
