In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

In [3]:
def sigmoid_prime(x):
    return sigmoid(x)*(1.0 - sigmoid(x))

In [6]:
class NeuralNetwork(object):
    
    def __init__(self, architecture):
        #architecture - numpy array with ith element representing the number of neurons in the ith layer.
        
        #Initialize the network architecture
        self.L = architecture.size - 1 #The index of the last layer L
        self.n = architecture #n stores the number of neurons in each layer
        self.input_size = self.n[0] #input_size is the number of neurons in the first layer
        self.output_size = self.n[self.L] #output_size is the number of neurons in the last layer
        
        #Parameters will store the network parameters, i.e. the weights and biases
        self.parameters = {}
        
        #Initialize the network weights and biases:
        for i in range (1, self.L + 1): 
            #Initialize weights to small random values
            self.parameters['W' + str(i)] = np.random.randn(self.n[i], self.n[i - 1]) * 0.01
            
            #Initialize rest of the parameters to 1
            self.parameters['b' + str(i)] = np.ones((self.n[i], 1))
            self.parameters['z' + str(i)] = np.ones((self.n[i], 1))
            self.parameters['a' + str(i)] = np.ones((self.n[i], 1))
        
        #As we started the loop from 1, we haven't initialized a[0]:
        self.parameters['a0'] = np.ones((self.n[i], 1))
        
        #Initialize the cost:
        self.parameters['C'] = 1
        
        #Create a dictionary for storing the derivatives:
        self.derivatives = {}
        
        #Learning rate
        self.alpha = 0.01
            
    def forward_propagate(self, X):
        #Note that X here, is just one training example
        self.parameters['a0'] = X
        
        #Calculate the activations for every layer l
        for l in range(1, self.L + 1):
            self.parameters['z' + str(l)] = np.add(np.dot(self.parameters['W' + str(l)], self.parameters['a' + str(l - 1)]), self.parameters['b' + str(l)])
            self.parameters['a' + str(l)] = sigmoid(self.parameters['z' + str(l)])
        
    def compute_cost(self, y):
        self.parameters['C'] = -(y*np.log(self.parameters['a' + str(self.L)]) + (1-y)*np.log( 1 - self.parameters['a' + str(self.L)]))
    
    def compute_derivatives(self, y):
        #Partial derivatives of the cost function with respect to z[L], W[L] and b[L]:        
        #dzL
        self.derivatives['dz' + str(self.L)] = self.parameters['a' + str(self.L)] - y
        #dWL
        self.derivatives['dW' + str(self.L)] = np.dot(self.derivatives['dz' + str(self.L)], np.transpose(self.parameters['a' + str(self.L - 1)]))
        #dbL
        self.derivatives['db' + str(self.L)] = self.derivatives['dz' + str(self.L)]

        #Partial derivatives of the cost function with respect to z[l], W[l] and b[l]
        for l in range(self.L-1, 0, -1):
            self.derivatives['dz' + str(l)] = np.dot(np.transpose(self.parameters['W' + str(l + 1)]), self.derivatives['dz' + str(l + 1)])*sigmoid_prime(self.parameters['z' + str(l)])
            self.derivatives['dW' + str(l)] = np.dot(self.derivatives['dz' + str(l)], np.transpose(self.parameters['a' + str(l - 1)]))
            self.derivatives['db' + str(l)] = self.derivatives['dz' + str(l)]
            
    def update_parameters(self):
        for l in range(1, self.L+1):
            self.parameters['W' + str(l)] -= self.alpha*self.derivatives['dW' + str(l)]
            self.parameters['b' + str(l)] -= self.alpha*self.derivatives['db' + str(l)]
        
    def predict(self, x):
        self.forward_propagate(x)
        
        #self.parameters['a0'] = X
        
        #Calculate the activations for every layer l
        #for i in range(1, self.L + 1):
         #   self.parameters['z' + str(i)] = np.add(np.dot(self.parameters['W' + str(i)], self.parameters['a' + str(i - 1)]), self.parameters['b' + str(i)])
          #  self.parameters['a' + str(i)] = sigmoid(self.parameters['z' + str(i)])

        return self.parameters['a' + str(self.L)]
        
    def fit(self, X, Y, num_iter):
        for iter in range(0, num_iter):
            c = 0
            acc = 0
            n_c = 0
            for i in range(0, X.shape[0]):
              x = X[i].reshape((X[i].size, 1))
              y = Y[i]
              self.forward_propagate(x)
              self.compute_cost(y)
              c += self.parameters['C'] 
              y_pred = self.predict(x)
              y_pred = (y_pred > 0.5)
              if y_pred == y:
                  n_c += 1
              self.compute_derivatives(y)
              self.update_parameters()
            
            c = c/X.shape[0]
            acc = (n_c/X.shape[0])*100
            print('Iteration: ', iter)
            print("Cost: ", c)
            print("Accuracy:", acc)

In [7]:
dataset = pd.read_csv('wheat-seeds-binary.csv')

In [8]:
shuffled_dataset = dataset.sample(frac=1).reset_index(drop=True)
shuffled_dataset['Class'] = shuffled_dataset['Class'] - 1

In [9]:
X = shuffled_dataset.iloc[:, 0:-1].values
y = shuffled_dataset.iloc[:, -1].values

In [10]:
sc_X = StandardScaler()
X = sc_X.fit_transform(X)

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3)

In [14]:
architecture = np.array([7, 2, 1])

In [15]:
classifier = NeuralNetwork(architecture)

In [16]:
classifier.fit(X_train, y_train, 10)

Iteration:  0
Cost:  [[0.73890065]]
Accuracy: 55.10204081632652
Iteration:  1
Cost:  [[0.70850564]]
Accuracy: 55.10204081632652
Iteration:  2
Cost:  [[0.69385541]]
Accuracy: 55.10204081632652
Iteration:  3
Cost:  [[0.68499343]]
Accuracy: 55.10204081632652
Iteration:  4
Cost:  [[0.67774778]]
Accuracy: 55.10204081632652
Iteration:  5
Cost:  [[0.67035692]]
Accuracy: 55.10204081632652
Iteration:  6
Cost:  [[0.66196914]]
Accuracy: 55.10204081632652
Iteration:  7
Cost:  [[0.65207563]]
Accuracy: 65.3061224489796
Iteration:  8
Cost:  [[0.64033605]]
Accuracy: 76.53061224489795
Iteration:  9
Cost:  [[0.62656054]]
Accuracy: 88.77551020408163


In [17]:
acc = 0
n_c = 0
for i in range(0, X_test.shape[0]):
  x = X_test[i].reshape((X_test[i].size, 1))
  y = y_test[i]
  y_pred = classifier.predict(x)
  y_pred = (y_pred > 0.5)
  #print('Expected: %d Got: %d' %(y, y_pred))
  if y_pred == y:
      n_c += 1

acc = (n_c/X_test.shape[0])*100
print("Test Accuracy", acc)

Test Accuracy 97.61904761904762
