In [1]:
#Import required libraries
import numpy as np
import pandas as pd

In [2]:
# Load dataset
df = pd.read_csv("/content/palindrome_data.csv")
df.shape[1]

11

In [3]:
df.head()

Unnamed: 0,x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,y
0,0,0,0,0,0,0,0,0,0,0,1
1,1,0,0,0,0,0,0,0,0,0,0
2,0,1,0,0,0,0,0,0,0,0,0
3,1,1,0,0,0,0,0,0,0,0,0
4,0,0,1,0,0,0,0,0,0,0,0


In [25]:
# Convert dataframe into numpy array
x = df.iloc[:,:df.shape[1]-1].to_numpy()
y = df.iloc[:,-1].to_numpy()

In [26]:
x.shape, y.shape

((1024, 10), (1024,))

## Define the neural network architecture

In [27]:
# Initialize parameters
def initialize_parameters(input_size, output_size, hidden_units):
  np.random.seed(0)
  # if you use the same seed value, you will get the same sequence of random numbers every time you run the program.
  # Parameters dict will contain all the parameters of FFNN.
  parameters = {}

  # Initialize parameters of hidden layer.
  parameters['w0'] = np.random.randn(hidden_units, input_size)
  parameters['b0'] = np.zeros(hidden_units)

  # Initialize parameters for hidden layer.
  parameters['w1'] = np.random.randn(output_size, hidden_units)
  parameters['b1'] = np.zeros(output_size)

  return parameters

In [28]:
input_size = 10
output_size = 1
hidden_units = 5
parameters = initialize_parameters(input_size, output_size, hidden_units)
parameters

{'w0': array([[ 1.76405235,  0.40015721,  0.97873798,  2.2408932 ,  1.86755799,
         -0.97727788,  0.95008842, -0.15135721, -0.10321885,  0.4105985 ],
        [ 0.14404357,  1.45427351,  0.76103773,  0.12167502,  0.44386323,
          0.33367433,  1.49407907, -0.20515826,  0.3130677 , -0.85409574],
        [-2.55298982,  0.6536186 ,  0.8644362 , -0.74216502,  2.26975462,
         -1.45436567,  0.04575852, -0.18718385,  1.53277921,  1.46935877],
        [ 0.15494743,  0.37816252, -0.88778575, -1.98079647, -0.34791215,
          0.15634897,  1.23029068,  1.20237985, -0.38732682, -0.30230275],
        [-1.04855297, -1.42001794, -1.70627019,  1.9507754 , -0.50965218,
         -0.4380743 , -1.25279536,  0.77749036, -1.61389785, -0.21274028]]),
 'b0': array([0., 0., 0., 0., 0.]),
 'w1': array([[-0.89546656,  0.3869025 , -0.51080514, -1.18063218, -0.02818223]]),
 'b1': array([0.])}

Activation Function

In [29]:
# Sigmoid activation function
def sigmoid(x):
  return 1/(1+np.exp(-x))

# Relu activation function
def relu(x):
  x[x < 0] = 0
  return x

In [30]:
# Get number of rows in the training data.
n_rows = x.shape[0]
train_split_size = int(n_rows*0.8)
test_split_size = n_rows-train_split_size

# Generate 'split size' random indices without replacement to select training data.
train_indices = np.random.choice(n_rows, train_split_size, replace=False)

# np.arange(start, stop, step, dtype) to generate evenly spaced values whithin given interval.
# np.setdiff1d(ar1, ar2, assume_unique=False) to find set difference between to positive 1d arrays.
test_indices = np.setdiff1d(np.arange(n_rows), train_indices)

# Get 'test_indices' which are complement of those of 'train_indices'.
# Select rows from x and y using train_indices and test indices.
X_train = x[train_indices]
y_train = y[train_indices]
X_test = x[test_indices]
y_test = y[test_indices]
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((819, 10), (819,), (205, 10), (205,))

In [41]:
parameters['w0'].shape, parameters['b0'].shape, parameters['w1'].shape, parameters['b1'].shape

((5, 10), (5,), (1, 5), (1,))

In [70]:
# Forward propagation
def forward_propagation(X, parameters):
  # Retrive the parameters from the dictionary
  w0, b0, w1, b1 = parameters['w0'], parameters['b0'], parameters['w1'], parameters['b1']

  # Calculate the net input at hidden layer
  Z0 = np.dot(w0, X.T)
  # Calculate the activation at hidden layer
  A0 = sigmoid(Z0)

  # Calculate the net input at output layer
  Z1 = np.dot(w1, A0)
  # Calculate the activation and output layer
  A1 = sigmoid(Z1)

  cache = {'Z0' : Z0, 'A0' : A0, 'Z1' : Z1, 'A1' : A1}
  return cache, A1

In [39]:
cache, y_hat = forward_propagation(X_train, parameters)

In [40]:
cache['A0'].shape, cache['Z0'].shape, cache['A1'].shape, cache['Z1'].shape

((5, 819), (5, 819), (1, 819), (1, 819))

## Calculate the cross-entropy loss

In [112]:
# Cross-entropy loss
def cross_entropy_loss(y_hat, y):
  n = y.shape[0]
  loss = -(1/n)*np.sum(y*np.log(y_hat)+(1-y)*np.log(1-y_hat))
  return loss

In [38]:
training_loss = cross_entropy_loss(y_hat, y_train)
training_loss

0.280809648227767

## Backpropagation

In [60]:
# Implement the backpropagation
def back_propagation(cache, parameters, X, y):
  w0, b0, w1, b1 = parameters['w0'], parameters['b0'], parameters['w1'], parameters['b1']

  # Retrive intermidiate value from cache
  Z0, A0, Z1, A1 = cache['Z0'], cache['A0'], cache['Z1'], cache['A1']

  # Compute the intermidiate derivative dZ1
  dZ1 = -(y-A1) # Dim: 1 x 1000

  # Compute the derivative of weights and bias of output layer
  dW1 = np.dot(dZ1, A0.T) # Dim: [1 x 1000].[1000x5] = [1x5]
  db1 = np.sum(dZ1, axis=1, keepdims=True) # Dim: 1x1

  # Compute the intermidiate derivative dA0 and dZ0
  dA0 = np.dot(parameters['w1'].T, dZ1) # Dim: [5x1][1x1000].[1x5] = [5x1000]
  dZ0 = dA0 *A0*(1-A0) # Dim: [5x1000]

  # Compute the derivative of weights and bias of input layer
  dW0 = np.dot(dZ0, X)
  db0 = np.sum(dZ0, axis=1, keepdims=True)

  gradients = {'dW1' : dW1, 'db1' : db1, 'dW0' : dW0, 'db0' : db0}

  return gradients

In [61]:
# Update weights
def update_weights(gradients, parameters, learning_rate):
  # Retrive the gradients
  dW0, db0, dW1, db1 = gradients['dW0'], gradients['db0'], gradients['dW1'], gradients['db1']

  # Retrive the parameters
  w0, b0, w1, b1 = parameters['w0'], parameters['b0'], parameters['w1'], parameters['b1']

  #Update weights
  w0 = w0 - learning_rate*dW0
  b0 = b0 - learning_rate*db0
  w1 = w1 - learning_rate*dW1
  b1 = b1 - learning_rate*db1

  parameters = {'w0': w0, 'b0': b0, 'w1': w1, 'b1':b1}
  return parameters

## Training the neural networks

In [108]:
# Train neural network
def train(X, y, hidden_layer_size, epoch, learning_rate):
  # Initialization the weights and biases
  parameters = initialize_parameters(X.shape[1], 1, hidden_layer_size)
  for i in range(epoch):
    # Forward propagation
    cache, A1 = forward_propagation(X, parameters)

    # calculate cross_entropy_loss
    loss = cross_entropy_loss(A1, y)

    # Backward Propagation
    gradients = back_propagation(cache, parameters, X, y)

    # Update parameters
    parameters = update_weights(gradients, parameters, learning_rate)

    if i%100 == 0:
      print(f'Iteration {i}, loss: {loss}')
  return parameters

In [115]:
parameters = train(X_train, y_train, 10, 1000, 0.001)

Iteration 0, loss: 2.9440580195023003
(1, 819), (819,)
Iteration 100, loss: 0.14328778786446483
(1, 819), (819,)
Iteration 200, loss: 0.13944214231288468
(1, 819), (819,)
Iteration 300, loss: 0.13702101106397768
(1, 819), (819,)
Iteration 400, loss: 0.13518019992429586
(1, 819), (819,)
Iteration 500, loss: 0.13359053386001496
(1, 819), (819,)
Iteration 600, loss: 0.13210216240631623
(1, 819), (819,)
Iteration 700, loss: 0.1306352853685331
(1, 819), (819,)
Iteration 800, loss: 0.12913979453169028
(1, 819), (819,)
Iteration 900, loss: 0.1275784931794383
(1, 819), (819,)


In [116]:
# predict the labels for new data
def predict(X, parameters):
    _, A2 = forward_propagation(X, parameters)
    A2[A2 > 0.5] = 1
    A2[A2 != 1] = 0
    return predictions

In [117]:
predictions = predict(X_test, parameters)

(1, 205)


In [119]:
y_test = y_test.reshape(1,y_test.shape[0])
y_test.shape

(1, 205)

In [125]:
correct_predictions = np.sum(predictions == y_test)
accuracy = correct_predictions/predictions.shape[1]
accuracy

0.975609756097561