# Libraries

In [78]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Import Data

In [79]:
features = np.load('clean_data/features.npy')
labels = np.load('clean_data/labels.npy')

## Spilt Train, Cross Validation and Test Data

In [80]:
x_train, x_test_full, y_train, y_test_full = train_test_split(features, labels, train_size=0.7, random_state=1)
x_test, x_cv, y_test, y_cv = train_test_split(x_test_full, y_test_full, train_size=0.5, random_state=1)
print(x_train.shape)
print(x_cv.shape)
print(x_test.shape)
print(y_train.shape)

(38727, 33)
(8299, 33)
(8299, 33)
(38727,)


# Back Propogation, R  Back Propogation

# Activation Function

In [81]:
def sigmoid(Z):
    return 1/(1+np.exp(-Z))
def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return np.where(x > 0, 1, 0)

# Function to initialize weights as Gaussian distributions with specific mu and sigma

In [82]:
def initialize_weights(nodes_in, nodes_out, mu=0, sigma=0.1):
    return np.random.normal(mu, sigma, size=(nodes_in, nodes_out))

## Neural Network

In [115]:

class NeuralNetwork:
    def __init__(self, x, y,vocabulary_size=21361, nodes_in_layer1=32, nodes_in_layer2=16, nodes_in_layer3=1, l_rate=0.001):
        embedding_dim =50
        self.x=x
        vocab_size = vocabulary_size +1 # Add 1 for the out-of-vocabulary token
        self.embedding_weights = np.random.randn(vocab_size, embedding_dim)
        # define x, y
        self.inputs_in_layer0 =self.embedding(self.x)
        self.y = y.reshape(-1, 1)  # reshape y to be a column vector

        self.l_rate = l_rate  # learning rate

        # define and set the number of neurons in each layer
        self.nodes_in_layer1 = nodes_in_layer1
        self.nodes_in_layer2 = nodes_in_layer2
        self.nodes_in_layer3 = nodes_in_layer3

        # initialize the weights (theta) matrices
        # self.thetas_layer0 = np.random.rand(self.inputs_in_layer0.shape[1] + 1, self.nodes_in_layer1)
        # self.thetas_layer1 = np.random.rand(self.nodes_in_layer1 + 1, self.nodes_in_layer2)
        # self.thetas_layer2 = np.random.rand(self.nodes_in_layer2 + 1, self.nodes_in_layer3)
        self.thetas_layer0 = np.random.randn(self.inputs_in_layer0.shape[1] + 1, self.nodes_in_layer1) * np.sqrt(2 / (self.inputs_in_layer0.shape[1] + 1))
        self.thetas_layer1 = np.random.randn(self.nodes_in_layer1 + 1, self.nodes_in_layer2) * np.sqrt(2 / (self.nodes_in_layer1 + 1))
        self.thetas_layer2 = np.random.randn(self.nodes_in_layer2 + 1, self.nodes_in_layer3) * np.sqrt(2 / (self.nodes_in_layer2 + 1))
        self.epsilon = 1e-5
        self.momentum = 0.9
        self.gamma1 = np.ones(nodes_in_layer1)
        self.beta1 = np.zeros(nodes_in_layer1)
        self.gamma2 = np.ones(nodes_in_layer2)
        self.beta2 = np.zeros(nodes_in_layer2)
    def batch_normalize(self, input_data, gamma, beta):
            # Calculate mean and variance
            batch_mean = np.mean(input_data, axis=0)
            batch_var = np.var(input_data, axis=0)

            # Normalize
            normalized_data = (input_data - batch_mean) / np.sqrt(batch_var + self.epsilon)

            # Scale and shift
            scaled_and_shifted_data = gamma * normalized_data + beta

            return scaled_and_shifted_data

    def feedforward(self):
        #compute all the nodes (a1, a2, a3, a4) in layer1
        n = self.inputs_in_layer0.shape[0]

        self.Z1 = self.thetas_layer0[0] + np.dot(self.inputs_in_layer0, self.thetas_layer0[1:])
        self.layer1 = relu(self.Z1)  #values of a1, a2, a3, a4 in layer 1
        self.normalized_data_layer1 = self.batch_normalize(self.layer1, self.gamma1, self.beta1)
        #compute all the nodes (a1, a2, a3) in layer2
        self.Z2 = self.thetas_layer1[0] + np.dot(self.normalized_data_layer1, self.thetas_layer1[1:])
        self.layer2 = relu(self.Z2)  #values of a1, a2, a3 in layer 2
        self.normalized_data_layer2 = self.batch_normalize(self.layer2, self.gamma2, self.beta2)
        #compute all the nodes (a1) in layer3
        self.Z3 = self.thetas_layer2[0] + np.dot(self.normalized_data_layer2, self.thetas_layer2[1:])
        self.layer3 = sigmoid(self.Z3)  #output layer

        return self.layer3

    def cost_func(self):
        epsilon = 1e-15  # Small constant to avoid log(0)
        self.n = self.inputs_in_layer0.shape[0]  # number of training examples
        self.cost = (1/self.n) * np.sum(-self.y * np.log(self.layer3 + epsilon) - (1 - self.y) * np.log(1 - self.layer3 + epsilon))
        return self.cost
    def embedding(self, x):
            self.embedded_input = self.embedding_weights[x]
            pooled_embeddings = np.mean(self.embedded_input, axis=1)
            return pooled_embeddings
    def calculate_accuracy(self ):
        actual_output=self.y
        predicted_output=self.layer3
        # Convert predicted probabilities to binary predictions (0 or 1)
        predicted_classes = (predicted_output >= 0.5 ).astype(int)

        # Compare predicted classes with actual classes
        correct_predictions = (predicted_classes == actual_output).sum()

        # Calculate accuracy
        accuracy = correct_predictions / len(actual_output)

        return accuracy

    def Rbackprop(self):
        # Define RProp parameters
        delta0 = self.l_rate  # Initial update value
        delta_max = 60  # Maximum update value
        delta_min = 1e-6  # Minimum update value
        eta_plus = 1.5  # Increase factor
        eta_minus = 0.5  # Decrease factor
        epsilon = 1e-15
         #dervative of E with respect to theta and bias in layer2
        self.dE_dlayer3 = (1/self.n) * (self.layer3-self.y)/(self.layer3*(1-self.layer3)+epsilon)
        self.dE_dZ3 = np.multiply(self.dE_dlayer3, (sigmoid(self.Z3)* (1-sigmoid(self.Z3))))
        self.dE_dtheta2 = np.dot(self.layer2.T, self.dE_dZ3)
        self.dE_dbias2 = np.dot(np.ones(self.n), self.dE_dZ3)

        #dervative of E with respect to theta and bias in layer1
        self.dE_dlayer2 = np.dot(self.dE_dZ3, self.thetas_layer2[1:].T)
        self.dE_dZ2 = np.multiply(self.dE_dlayer2, relu_derivative(self.Z2))
        self.dE_dtheta1 = np.dot(self.layer1.T, self.dE_dZ2)
        self.dE_dbias1 = np.dot(np.ones(self.n), self.dE_dZ2)


        #dervative of E with respect to theta and bias in layer0
        self.dE_dlayer1 = np.dot(self.dE_dZ2, self.thetas_layer1[1:].T)
        self.dE_dZ1 = np.multiply(self.dE_dlayer1, relu_derivative(self.Z1))
        self.dE_dtheta0 = np.dot(self.inputs_in_layer0.T, self.dE_dZ1)
        self.dE_dbias0 = np.dot(np.ones(self.n), self.dE_dZ1)

        # Initialize RProp update values
        if not hasattr(self, 'prev_dE_dtheta2'):
            self.prev_dE_dtheta2 = np.zeros_like(self.dE_dtheta2)
            self.delta_theta2 = np.full_like(self.dE_dtheta2, delta0)
        else:
            self.delta_theta2 = np.where(self.dE_dtheta2 * self.prev_dE_dtheta2 > 0,
                                        np.minimum(self.delta_theta2 * eta_plus, delta_max),
                                        np.maximum(self.delta_theta2 * eta_minus, delta_min))
        self.prev_dE_dtheta2 = self.dE_dtheta2

        if not hasattr(self, 'prev_dE_dtheta1'):
            self.prev_dE_dtheta1 = np.zeros_like(self.dE_dtheta1)
            self.delta_theta1 = np.full_like(self.dE_dtheta1, delta0)
        else:
            self.delta_theta1 = np.where(self.dE_dtheta1 * self.prev_dE_dtheta1 > 0,
                                        np.minimum(self.delta_theta1 * eta_plus, delta_max),
                                        np.maximum(self.delta_theta1 * eta_minus, delta_min))
        self.prev_dE_dtheta1 = self.dE_dtheta1

        if not hasattr(self, 'prev_dE_dtheta0'):
            self.prev_dE_dtheta0 = np.zeros_like(self.dE_dtheta0)
            self.delta_theta0 = np.full_like(self.dE_dtheta0, delta0)
        else:
            self.delta_theta0 = np.where(self.dE_dtheta0 * self.prev_dE_dtheta0 > 0,
                                        np.minimum(self.delta_theta0 * eta_plus, delta_max),
                                        np.maximum(self.delta_theta0 * eta_minus, delta_min))
        self.prev_dE_dtheta0 = self.dE_dtheta0

        # Updating theta using RProp in layers 2, 1, and 0
        self.thetas_layer2[1:] -= np.sign(self.dE_dtheta2) * self.delta_theta2
        self.thetas_layer1[1:] -= np.sign(self.dE_dtheta1) * self.delta_theta1
        self.thetas_layer0[1:] -= np.sign(self.dE_dtheta0) * self.delta_theta0

        # Updating bias using RProp in layers 2, 1, and 0
        self.thetas_layer2[0] -= np.sign(self.dE_dbias2) * self.delta_theta2[0]
        self.thetas_layer1[0] -= np.sign(self.dE_dbias1) * self.delta_theta1[0]
        self.thetas_layer0[0] -= np.sign(self.dE_dbias0) * self.delta_theta0[0]
        return self

    def backprop(self):
        epsilon=1e-15
        #dervative of E with respect to theta and bias in layer2
        self.dE_dlayer3 = (1/self.n) * (self.layer3-self.y)/(self.layer3*(1-self.layer3)+epsilon)
        self.dE_dZ3 = np.multiply(self.dE_dlayer3, (sigmoid(self.Z3)* (1-sigmoid(self.Z3))))
        self.dE_dtheta2 = np.dot(self.layer2.T, self.dE_dZ3)
        self.dE_dbias2 = np.dot(np.ones(self.n), self.dE_dZ3)

        #dervative of E with respect to theta and bias in layer1
        self.dE_dlayer2 = np.dot(self.dE_dZ3, self.thetas_layer2[1:].T)
        self.dE_dZ2 = np.multiply(self.dE_dlayer2, relu_derivative(self.Z2))
        self.dE_dtheta1 = np.dot(self.layer1.T, self.dE_dZ2)
        self.dE_dbias1 = np.dot(np.ones(self.n), self.dE_dZ2)
        # Gradient for batch normalization parameters in layer2
        dL_dgamma2 = np.sum(self.dE_dZ2 * self.normalized_data_layer2, axis=0)
        dL_dbeta2 = np.sum(self.dE_dZ2, axis=0)

        #dervative of E with respect to theta and bias in layer0
        self.dE_dlayer1 = np.dot(self.dE_dZ2, self.thetas_layer1[1:].T)
        self.dE_dZ1 = np.multiply(self.dE_dlayer1, relu_derivative(self.Z1))
        self.dE_dtheta0 = np.dot(self.inputs_in_layer0.T, self.dE_dZ1)
        self.dE_dbias0 = np.dot(np.ones(self.n), self.dE_dZ1)
        # Gradient for batch normalization parameters in layer1
        dL_dgamma1 = np.sum(self.dE_dZ1 * self.normalized_data_layer1, axis=0)
        dL_dbeta1 = np.sum(self.dE_dZ1, axis=0)
        #updating theta using gradient descent in layers 2, 1, and 0
        self.thetas_layer2[1:] = self.thetas_layer2[1:] - self.l_rate * self.dE_dtheta2
        self.thetas_layer1[1:] = self.thetas_layer1[1:] - self.l_rate * self.dE_dtheta1
        self.thetas_layer0[1:] = self.thetas_layer0[1:] - self.l_rate * self.dE_dtheta0
        # self.de_wegihts = np.dot(self.embedded_input.T, self.dE_dZ1)
        # self.embedding_weights = -self.embedding_weights - self.l_rate * self.de_wegihts
        # Update batch normalization parameters using gradient descent
        self.gamma1 -= self.l_rate * dL_dgamma1
        self.beta1 -= self.l_rate * dL_dbeta1
        self.gamma2 -= self.l_rate * dL_dgamma2
        self.beta2 -= self.l_rate * dL_dbeta2

        #updating bias using gradient descent in layers 2, 1, and 0
        self.thetas_layer2[0] = self.thetas_layer2[0] - self.l_rate * self.dE_dbias2
        self.thetas_layer1[0] = self.thetas_layer1[0] - self.l_rate * self.dE_dbias1
        self.thetas_layer0[0] = self.thetas_layer0[0] - self.l_rate * self.dE_dbias0
        return self
    def fit(self,epochs,Backpropagate):
        losses=[]
        for i in range(epochs):
            self.feedforward()
            error=self.cost_func()
            losses.append(error)
            if Backpropagate==True:
                self.backprop()
            else:
                self.Rbackprop()
            print("iteration #",i+1)
            print('accuracy: ',self.calculate_accuracy())
            print("Cost: \n",error,"\n")
    def evaluate(self, x,y):
        epsilon=1e-15
        inputs_layer0 = self.embedding(x)
        Z1 = self.thetas_layer0[0] + np.dot(inputs_layer0, self.thetas_layer0[1:])
        layer1 = relu(Z1)
        layer1 = self.batch_normalize(layer1, self.gamma1, self.beta1)
        Z2 = self.thetas_layer1[0] + np.dot(layer1, self.thetas_layer1[1:])
        layer2 = relu(Z2)
        layer2=self.batch_normalize(layer2,self.gamma2,self.beta2)
        Z3 = self.thetas_layer2[0] + np.dot(layer2, self.thetas_layer2[1:])
        layer3 = sigmoid(Z3)
        loss= (1/inputs_layer0.shape[0]) * np.sum(-y * np.log(layer3) - (1 - y) * np.log(1 - layer3+epsilon)) #cross entropy
        actual_output=y
        predicted_output=layer3
        # Convert predicted probabilities to binary predictions (0 or 1)
        predicted_classes = (predicted_output >= 0.5).astype(int)

        # Compare predicted classes with actual classes
        correct_predictions = (predicted_classes == actual_output).sum()

        # Calculate accuracy
        accuracy = correct_predictions / len(actual_output)
        return loss,accuracy,layer3

# Train

In [116]:
nn=NeuralNetwork(x_train,y_train,l_rate=0.1)
nn.fit(epochs=100,Backpropagate=True)

iteration # 1
accuracy:  0.4997805148862551
Cost: 
 0.8436458509745449 

iteration # 2
accuracy:  0.5000903762233068
Cost: 
 0.8334022452853972 

iteration # 3
accuracy:  0.5022077620264932
Cost: 
 0.8242117977603617 

iteration # 4
accuracy:  0.5033439202623493
Cost: 
 0.8158612440626661 

iteration # 5
accuracy:  0.5058744545149378
Cost: 
 0.8080388539729118 

iteration # 6
accuracy:  0.5087664936607534
Cost: 
 0.8008780613186712 

iteration # 7
accuracy:  0.509231285666331
Cost: 
 0.7941488030486146 

iteration # 8
accuracy:  0.5090247114416299
Cost: 
 0.787621621660901 

iteration # 9
accuracy:  0.5097735430061714
Cost: 
 0.7810559687349881 

iteration # 10
accuracy:  0.5139308492782813
Cost: 
 0.7750449445235388 

iteration # 11
accuracy:  0.5165646706432205
Cost: 
 0.769209908281527 

iteration # 12
accuracy:  0.5194308880109484
Cost: 
 0.7637631796413099 

iteration # 13
accuracy:  0.522322927156764
Cost: 
 0.7588062078298793 

iteration # 14
accuracy:  0.5271774214372402
Cost: 

In [117]:
test=nn.evaluate(x_cv,y_cv.reshape(-1,1))
print('Loss :',test[0])
print('Acc :',test[1])

Loss : 0.6896738007455112
Acc : 0.5709121580913363


In [118]:
Test=nn.evaluate(x_test,y_test.reshape(-1,1))
print('Loss :',Test[0])
print('Acc :',Test[1])

Loss : 0.687756925960197
Acc : 0.5709121580913363


In [119]:
nn1=NeuralNetwork(x_train,y_train)
nn1.fit(epochs=100,Backpropagate=False)

iteration # 1
accuracy:  0.5525085857412141
Cost: 
 0.8247799918214779 

iteration # 2
accuracy:  0.5519663284013737
Cost: 
 0.8200794113007376 

iteration # 3
accuracy:  0.5534381697523691
Cost: 
 0.8114387664558357 

iteration # 4
accuracy:  0.557388901799778
Cost: 
 0.7996853443386978 

iteration # 5
accuracy:  0.5579311591396183
Cost: 
 0.7867099948875134 

iteration # 6
accuracy:  0.5597903271619283
Cost: 
 0.7742313561118422 

iteration # 7
accuracy:  0.5596870400495778
Cost: 
 0.7586914321114848 

iteration # 8
accuracy:  0.5697833552818448
Cost: 
 0.737325378857916 

iteration # 9
accuracy:  0.5688795930487773
Cost: 
 0.7149082550210897 

iteration # 10
accuracy:  0.549151754589821
Cost: 
 0.7063938003082624 

iteration # 11
accuracy:  0.5682340485965863
Cost: 
 0.6858900696375574 

iteration # 12
accuracy:  0.5905698866423942
Cost: 
 0.6698825328988443 

iteration # 13
accuracy:  0.5956051333694838
Cost: 
 0.6676947806909164 

iteration # 14
accuracy:  0.599452578304542
Cost: 

In [120]:
Test_1=nn1.evaluate(x_cv,y_cv.reshape(-1,1))
print('Loss :',Test_1[0])
print('Acc :',Test_1[1])

Loss : 0.6711490782867995
Acc : 0.6020002409928907


In [121]:
Test_2=nn1.evaluate(x_test,y_test.reshape(-1,1))
print('Loss :',Test_2[0])
print('Acc :',Test_2[1])

Loss : 0.6719181589588227
Acc : 0.6041691770092782


# Bayiesn Neural Network

In [122]:
class BayesianNeuralNetwork:
    def __init__(self, x, y,vocabulary_size=21361,nodes_in_layer1=32, nodes_in_layer2=16, nodes_in_layer3=1, l_rate=0.1):
        embedding_dim= 50
        vocab_size = vocabulary_size +1 # Add 1 for the out-of-vocabulary token
        self.embedding_weights = np.random.randn(vocab_size, embedding_dim)
        # Define x, y
        self.inputs_in_layer0 = self.embedding(x) # Layer 0
        self.y = y.reshape(-1,1)

        self.l_rate = l_rate  # Learning rate

        # Define and set the number of neurons in each layer
        self.nodes_in_layer1 = nodes_in_layer1
        self.nodes_in_layer2 = nodes_in_layer2
        self.nodes_in_layer3 = nodes_in_layer3

        # Initialize weights and biases with smaller values using Gaussian distributions
        self.thetas_layer0 = initialize_weights(self.inputs_in_layer0.shape[1] + 1, self.nodes_in_layer1, mu=0.001, sigma=0.01)
        self.thetas_layer1 = initialize_weights(self.nodes_in_layer1 + 1, self.nodes_in_layer2, mu=0.001, sigma=0.01)
        self.thetas_layer2 = initialize_weights(self.nodes_in_layer2 + 1, self.nodes_in_layer3, mu=0.001, sigma=0.01)

        # Initialize prior distributions for weights
        self.prior_mean_theta0 = np.zeros_like(self.thetas_layer0)
        self.prior_mean_theta1 = np.zeros_like(self.thetas_layer1)
        self.prior_mean_theta2 = np.zeros_like(self.thetas_layer2)

        self.prior_variance_theta0 = np.ones_like(self.thetas_layer0)
        self.prior_variance_theta1 = np.ones_like(self.thetas_layer1)
        self.prior_variance_theta2 = np.ones_like(self.thetas_layer2)
    def embedding(self, x):
        self.embedded_input = self.embedding_weights[x]
        pooled_embeddings = np.mean(self.embedded_input, axis=1)
        return pooled_embeddings

    def feedforward(self):
        # Sample weights from their respective Gaussian distributions for each forward pass
        self.Z1 = self.thetas_layer0[0] + np.dot(self.inputs_in_layer0, self.thetas_layer0[1:])
        self.layer1 = relu(self.Z1)

        self.Z2 = self.thetas_layer1[0] + np.dot(self.layer1, self.thetas_layer1[1:])
        self.layer2 = relu(self.Z2)

        self.Z3 = self.thetas_layer2[0] + np.dot(self.layer2, self.thetas_layer2[1:])
        self.layer3 = sigmoid(self.Z3)

        return self.layer3

    def calculate_accuracy(self):
        actual_output = self.y
        predicted_output = self.layer3
        # Convert predicted probabilities to binary predictions (0 or 1)
        predicted_classes = (predicted_output >= 0.5).astype(int)

        # Compare predicted classes with actual classes
        correct_predictions = (predicted_classes == actual_output).sum()

        # Calculate accuracy
        accuracy = correct_predictions / len(actual_output)

        return accuracy

    def log_prior(self):
        # Calculate log priors for weights using Gaussian distributions
        log_prior_theta0 = -0.5 * (np.log(2 * np.pi * self.prior_variance_theta0) +
                                   ((self.thetas_layer0 - self.prior_mean_theta0) ** 2) /
                                   self.prior_variance_theta0).sum()

        log_prior_theta1 = -0.5 * (np.log(2 * np.pi * self.prior_variance_theta1) +
                                   ((self.thetas_layer1 - self.prior_mean_theta1) ** 2) /
                                   self.prior_variance_theta1).sum()

        log_prior_theta2 = -0.5 * (np.log(2 * np.pi * self.prior_variance_theta2) +
                                   ((self.thetas_layer2 - self.prior_mean_theta2) ** 2) /
                                   self.prior_variance_theta2).sum()

        return log_prior_theta0 + log_prior_theta1 + log_prior_theta2

    def log_likelihood(self):
        # Compute log likelihood for Bernoulli distribution
        self.n = self.inputs_in_layer0.shape[0]  # Number of training examples

        # Calculate log-likelihood for Bernoulli likelihood
        epsilon = 1e-10  # Small value to prevent log(0)
        log_likelihood = np.sum(self.y * np.log(self.layer3 + epsilon) + (1 - self.y) * np.log(1 - self.layer3 + epsilon))

        # Normalize log-likelihood by the number of training examples
        log_likelihood /= -self.n

        return log_likelihood

    def log_posterior(self):
        # Compute log posterior using log prior and log likelihood
        log_prior = self.log_prior()
        log_likelihood = self.log_likelihood()
        log_posterior = log_prior + log_likelihood
        return log_posterior
    def perform_MCMC(self,proposal_variance=0.01):
         # Make a copy of the current weights for proposal
        proposed_thetas_layer0 = np.copy(self.thetas_layer0)
        proposed_thetas_layer1 = np.copy(self.thetas_layer1)
        proposed_thetas_layer2 = np.copy(self.thetas_layer2)

            # Perturb the weights for proposal (using a Gaussian random walk as an example)
        proposed_thetas_layer0 += np.random.normal(0, proposal_variance, size=self.thetas_layer0.shape)
        proposed_thetas_layer1 += np.random.normal(0, proposal_variance, size=self.thetas_layer1.shape)
        proposed_thetas_layer2 += np.random.normal(0, proposal_variance, size=self.thetas_layer2.shape)

            # Compute log-likelihoods for current and proposed weights
        current_log_likelihood = self.log_likelihood()

            # Compute log-posterior for the proposed weights
        self.thetas_layer0 = proposed_thetas_layer0
        self.thetas_layer1 = proposed_thetas_layer1
        self.thetas_layer2 = proposed_thetas_layer2

        proposed_log_likelihood = self.log_likelihood()
        proposed_log_posterior = self.log_prior() + proposed_log_likelihood

            # Accept or reject the proposal based on Metropolis-Hastings acceptance criterion
        acceptance_ratio = np.exp(proposed_log_posterior - current_log_likelihood)
        if np.random.uniform(0, 1) < acceptance_ratio:
            # Accept the proposal
            pass
        else:
            # Reject the proposal, revert weights to the previous state
            self.thetas_layer0 = np.copy(proposed_thetas_layer0)
            self.thetas_layer1 = np.copy(proposed_thetas_layer1)
            self.thetas_layer2 = np.copy(proposed_thetas_layer2)
    def fit(self,epochs):
        losses=[]
        for i in range(epochs):
            self.feedforward()
            error = self.log_likelihood()  # Compute log-likelihood as the error
            losses.append(error)
            self.perform_MCMC()
            print("iteration #",i+1)
            print('accuracy: ',self.calculate_accuracy())
            print("Cost: \n",error,"\n")
    def evaluate(self, x,y):
        inputs_layer0 = self.embedding(x)
        Z1 = self.thetas_layer0[0] + np.dot(inputs_layer0, self.thetas_layer0[1:])
        layer1 = sigmoid(Z1)

        Z2 = self.thetas_layer1[0] + np.dot(layer1, self.thetas_layer1[1:])
        layer2 = sigmoid(Z2)

        Z3 = self.thetas_layer2[0] + np.dot(layer2, self.thetas_layer2[1:])
        layer3 = sigmoid(Z3)
        loss= (1/inputs_layer0.shape[0]) * np.sum(-y * np.log(layer3) - (1 - y) * np.log(1 - layer3)) #cross entropy
        actual_output=y
        predicted_output=layer3
        # Convert predicted probabilities to binary predictions (0 or 1)
        predicted_classes = (predicted_output >= 0.5).astype(int)

        # Compare predicted classes with actual classes
        correct_predictions = (predicted_classes == actual_output).sum()

        # Calculate accuracy
        accuracy = correct_predictions / len(actual_output)
        return loss,accuracy,layer3



# Train

In [127]:
bnn=BayesianNeuralNetwork(x_train,y_train)
bnn.fit(100)

iteration # 1
accuracy:  0.5426963100679113
Cost: 
 0.6926860173836177 

iteration # 2
accuracy:  0.4573036899320887
Cost: 
 0.693164582387466 

iteration # 3
accuracy:  0.4573036899320887
Cost: 
 0.693239430976369 

iteration # 4
accuracy:  0.4573036899320887
Cost: 
 0.6936303307315869 

iteration # 5
accuracy:  0.4573036899320887
Cost: 
 0.6932849795577652 

iteration # 6
accuracy:  0.4573036899320887
Cost: 
 0.6936284416142072 

iteration # 7
accuracy:  0.4571487592635629
Cost: 
 0.6931842445264546 

iteration # 8
accuracy:  0.5426963100679113
Cost: 
 0.6930512167431293 

iteration # 9
accuracy:  0.5426963100679113
Cost: 
 0.6931141593851355 

iteration # 10
accuracy:  0.4573036899320887
Cost: 
 0.6937797322526289 

iteration # 11
accuracy:  0.4573036899320887
Cost: 
 0.6937116949814723 

iteration # 12
accuracy:  0.4573036899320887
Cost: 
 0.6936405964429638 

iteration # 13
accuracy:  0.4573036899320887
Cost: 
 0.6938338939504345 

iteration # 14
accuracy:  0.45541870013169106
Cos

In [128]:
bnn_test=bnn.evaluate(x_cv,y_cv.reshape(-1,1))
print('Loss :',bnn_test[0])
print('Acc :',bnn_test[1])

Loss : 0.7002329939377921
Acc : 0.4624653572719605


In [129]:
bnn_test_1=bnn.evaluate(x_test,y_test.reshape(-1,1))
print('Loss :',bnn_test_1[0])
print('Acc :',bnn_test_1[1])

Loss : 0.7006930228990389
Acc : 0.45897096035666946
