In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split

# Shallow Neural Network

In [3]:
class ShallowNeuralNetwork:
    
    def __init__(self, n_h=4, type='binary', h_func_type='relu', random_state=-1):
        
        # #hidden_units
        self.n_h = n_h
        self.type = type
        self.h_func_type = h_func_type
        self.random_state = random_state
        self.functions = {'sigmoid': self.sigmoid,
                          'relu': self.relu,
                          'tanh': np.tanh}
        
        self.derivative = {'sigmoid': self.sigmoidDerivative, 
                           'relu': self.reluDerivative, 
                           'tanh': self.tanhDerivative}
        
    def parametersInitialization(self, n_x, n_y):
    
        if self.random_state != -1:
            np.random.seed(self.random_state)
            
        # shape(#hidden_units, #features)
        W1 = np.random.randn(self.n_h, n_x) * 0.01
        
        # shape(#hidden_units, 1)
        b1 = np.zeros((self.n_h, 1))
        
        # shape(#output_unit, #hidden_units)
        W2 = np.random.randn(n_y, self.n_h) * 0.01
        
        # shape(#output_unit, 1)
        b2 = np.zeros((n_y, 1))

        self.parameters = {'W1': W1,
                      'b1': b1,
                      'W2': W2,
                      'b2': b2}
        
    def retrieveParameters(self):
    
        return self.parameters['W1'], self.parameters['b1'], self.parameters['W2'], self.parameters['b2']
    
    def retrieveCache(self):
    
        return self.cache['Z1'], self.cache['A1'], self.cache['Z2'], self.cache['A2']
    
    def retrieveGrads(self):
    
        return self.grads['dW1'], self.grads['db1'], self.grads['dW2'], self.grads['db2']
    
    def sigmoid(self, z):
        
        return (1 / (1 + np.exp(-z)))
    
    def relu(self, z):
    
        return np.maximum(0, z)
    
    def softmax(self, z):
        
        return (np.exp(z) / np.sum(np.exp(z), axis=0))
    
    def sigmoidDerivative(self, Z):
        
        A = self.sigmoid(Z)
        
        return (A * (1 - A))
    
    def reluDerivative(self, Z):
        
        A = np.where(Z >= 0., 1., 0.)
            
        return A
    
    def tanhDerivative(self, Z):
        
        A = np.tanh(Z)
        
        return (1 - A**2)
        
    def forward(self, X):
    
        W1, b1, W2, b2 = self.retrieveParameters()

        # shape(#hidden_units, #samples)
        Z1 = np.dot(W1, X) + b1
        
        # shape(#hidden_units, #samples)
        A1 = self.functions[self.h_func_type](Z1)
        
        # shape(#output_unit, #samples)
        Z2 = np.dot(W2, A1) + b2
        
        if self.type == 'multi':
            # shape(#output_unit, #samples)
            A2 = self.softmax(Z2)
        else:
            # shape(#output_unit, #samples)
            A2 = self.sigmoid(Z2)

        self.cache= {'Z1': Z1,
                'A1': A1,
                'Z2': Z2,
                'A2': A2}

        return A2
    
    def binary_crossEntropy(self, Y_hat, Y):
        
        # Y_hat : shape(#output_unit, #samples)
        # Y : shape(#output_unit, #samples)
    
        m = Y.shape[1]

        # shape(#output_unit, #samples)
        loss = - (Y * np.log(Y_hat) + (1 - Y) * np.log(1 - Y_hat))
        
        # scalar
        cost = np.sum(loss) / m

        return cost
    
    def crossEntropy(self, Y_hat, Y):
        
        # Y_hat : shape(#output_unit, #samples)
        # Y : shape(#output_unit, #samples)
        
        m = Y.shape[1]
        
        # shape(#samples, )
        loss =  - (np.sum(np.log(Y_hat) * (Y), axis=0))
        
        # scalar
        cost = np.sum(loss) / m
        
        return cost
    
    def backward(self, X, Y):
    
        m = X.shape[1]

        W1, b1, W2, b2 = self.retrieveParameters()
        Z1, A1, Z2, A2 = self.retrieveCache()

        # shape(#output_unit, #samples)
        dZ2 = A2 - Y
        
        # shape(#output_unit, #hidden_units)
        dW2 = np.dot(dZ2, A1.T) / m
        
        # shape(#output_unit, 1)
        db2 = np.sum(dZ2, axis=1, keepdims=True) / m
    
        # shape(#hidden_units, #samples)
        dZ1 = (np.dot(W2.T, dZ2)) * (self.derivative[self.h_func_type](Z1))
        
        # shape(#hidden_units, #features)
        dW1 = np.dot(dZ1, X.T) / m
        
        # shape(#hidden_units, 1)
        db1 = np.sum(dZ1, axis=1, keepdims=True) / m

        self.grads = {'dW1': dW1,
                 'db1': db1,
                 'dW2': dW2,
                 'db2': db2}
        
    def updateParameters(self, alpha):
    
        W1, b1, W2, b2 = self.retrieveParameters()

        dW1, db1, dW2, db2 = self.retrieveGrads()

        # shape(#hidden_units, #features)
        W1 = W1 - alpha * dW1
        
        # shape(#hidden_units, 1)
        b1 = b1 - alpha * db1
        
        # shape(#output_unit, #hidden_units)
        W2 = W2 - alpha * dW2
        
        # shape(#output_unit, 1)
        b2 = b2 - alpha * db2

        self.parameters = {"W1": W1,
                      "b1": b1,
                      "W2": W2,
                      "b2": b2}
        
    def train(self, X, Y, alpha, epochs, print_cost=False):
        
        # X : shape(#features, #samples)
        # Y : shape(1, #samples)
        
        # #features, #output_unit
        n_x, n_y = X.shape[0], Y.shape[0]
        
        self.parametersInitialization(n_x, n_y)
        
        for i in range(epochs):
            
            # shape(1, #output_unit)
            A2 = self.forward(X)
            
            if self.type == 'multi':
                # scalar
                cost = self.crossEntropy(A2, Y)
            else:
                # scalar
                cost = self.binary_crossEntropy(A2, Y)
            
            self.backward(X, Y)
            
            self.updateParameters(alpha)
            
            if print_cost and i % (epochs // 10) == 0:
                print ("Cost after iteration %i : %f" %(i, cost))
                
    def predict(self, X):
        
        W1, b1, W2, b2 = self.retrieveParameters()
        
        A2 = self.forward(X)
        
        if self.type == 'multi':
            Y_pred = A2.argmax(axis=0)
        else:
            Y_pred = np.where(A2 > 0.5, 1., 0.)
        
        return Y_pred

# Load Dataset

In [4]:
def loadIrisBinary(path, size=0.2, random_state=0):
    
    df = pd.read_csv(path)
    df = df.sample(frac=1, random_state=random_state)
    df.Species.replace(('Iris-setosa', 'Iris-versicolor'), (0., 1.), inplace=True)
    
    X_train, X_val, Y_train, Y_val = train_test_split(df.drop(['Species'], axis=1), 
                                                      df.Species, 
                                                      test_size=size, 
                                                      random_state=random_state)
    X_train, X_val = X_train.values.T, X_val.values.T
    Y_train, Y_val = Y_train.values.reshape(1, -1), Y_val.values.reshape(1, -1)
    
    return X_train, Y_train, X_val, Y_val

In [5]:
def loadIrisMulti(path, size=0.2, random_state=0):

    df = pd.read_csv(path)
    df.Species.replace(('Iris-setosa', 'Iris-versicolor', 'Iris-virginica'), (0, 1, 2), inplace=True)
    df = df.sample(frac=1, random_state=random_state)

    X_train, X_val, Y_train, Y_val = train_test_split(df.drop(['Species'], axis=1), 
                                                          df.Species, 
                                                          test_size=size, 
                                                          random_state=random_state)
    X_train, X_val = X_train.values.T, X_val.values.T
    Y_train, Y_val = Y_train.values, Y_val.values
    Y_train = ((np.arange(np.max(Y_train) + 1) == Y_train[:, None]).astype(float)).T
    
    return X_train, Y_train, X_val, Y_val

# Training

### Iris Binary

In [6]:
X_train, Y_train, X_val, Y_val = loadIrisBinary('data/Iris_binary.csv', size=0.1)
model = ShallowNeuralNetwork(n_h=4, type='binary', h_func_type='tanh', random_state=0)
model.train(X_train, Y_train, 0.1, 100, print_cost=True)
Y_pred = model.predict(X_val)
print(classification_report(Y_val.flatten(), Y_pred.flatten()))

Cost after iteration 0 : 0.692990
Cost after iteration 10 : 0.692046
Cost after iteration 20 : 0.689178
Cost after iteration 30 : 0.680146
Cost after iteration 40 : 0.652534
Cost after iteration 50 : 0.582169
Cost after iteration 60 : 0.462598
Cost after iteration 70 : 0.334333
Cost after iteration 80 : 0.235743
Cost after iteration 90 : 0.170667
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00         4
         1.0       1.00      1.00      1.00         6

    accuracy                           1.00        10
   macro avg       1.00      1.00      1.00        10
weighted avg       1.00      1.00      1.00        10



### Iris Multiclass

In [7]:
X_train, Y_train, X_val, Y_val = loadIrisMulti('data/Iris.csv')
model = ShallowNeuralNetwork(n_h=4, type='multi', h_func_type='tanh', random_state=0)
model.train(X_train, Y_train, 0.1, 150, print_cost=True)
Y_pred = model.predict(X_val)
print(classification_report(Y_val, Y_pred))

Cost after iteration 0 : 1.098791
Cost after iteration 15 : 1.095388
Cost after iteration 30 : 1.069127
Cost after iteration 45 : 0.923583
Cost after iteration 60 : 0.677593
Cost after iteration 75 : 0.559173
Cost after iteration 90 : 0.507121
Cost after iteration 105 : 0.473852
Cost after iteration 120 : 0.442082
Cost after iteration 135 : 0.406681
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      0.88      0.93         8
           2       0.92      1.00      0.96        12

    accuracy                           0.97        30
   macro avg       0.97      0.96      0.96        30
weighted avg       0.97      0.97      0.97        30

