# Single Neural Network using Iris dataset from scikit-learn
## Iris dataset has 50 samples from three species, Iris Setosa, Iris Verginica, and Iris Versicolor
## It has four features, length and width of the sepals and petals
## Using this feature, will try to classify two species Iris Setosa and Iris Verginica by training a simple neural network. The first hundred samples in the iris data set correspond to Iris setosa, and Iris verginica mapped as 0 and 1 respectively.

### Collect the data from the dataset

In [1]:
from sklearn import datasets
import numpy as np
iris = datasets.load_iris()

### extracting first 100 samples for Iris Setosa and Iris Verginica

In [2]:
X = iris.data[:100, :4]

In [3]:
print(X)

[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]
 [5.4 3.9 1.7 0.4]
 [4.6 3.4 1.4 0.3]
 [5.  3.4 1.5 0.2]
 [4.4 2.9 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.4 3.7 1.5 0.2]
 [4.8 3.4 1.6 0.2]
 [4.8 3.  1.4 0.1]
 [4.3 3.  1.1 0.1]
 [5.8 4.  1.2 0.2]
 [5.7 4.4 1.5 0.4]
 [5.4 3.9 1.3 0.4]
 [5.1 3.5 1.4 0.3]
 [5.7 3.8 1.7 0.3]
 [5.1 3.8 1.5 0.3]
 [5.4 3.4 1.7 0.2]
 [5.1 3.7 1.5 0.4]
 [4.6 3.6 1.  0.2]
 [5.1 3.3 1.7 0.5]
 [4.8 3.4 1.9 0.2]
 [5.  3.  1.6 0.2]
 [5.  3.4 1.6 0.4]
 [5.2 3.5 1.5 0.2]
 [5.2 3.4 1.4 0.2]
 [4.7 3.2 1.6 0.2]
 [4.8 3.1 1.6 0.2]
 [5.4 3.4 1.5 0.4]
 [5.2 4.1 1.5 0.1]
 [5.5 4.2 1.4 0.2]
 [4.9 3.1 1.5 0.1]
 [5.  3.2 1.2 0.2]
 [5.5 3.5 1.3 0.2]
 [4.9 3.1 1.5 0.1]
 [4.4 3.  1.3 0.2]
 [5.1 3.4 1.5 0.2]
 [5.  3.5 1.3 0.3]
 [4.5 2.3 1.3 0.3]
 [4.4 3.2 1.3 0.2]
 [5.  3.5 1.6 0.6]
 [5.1 3.8 1.9 0.4]
 [4.8 3.  1.4 0.3]
 [5.1 3.8 1.6 0.2]
 [4.6 3.2 1.4 0.2]
 [5.3 3.7 1.5 0.2]
 [5.  3.3 1.4 0.2]
 [7.  3.2 4.7 1.4]
 [6.4 3.2 4.5 1.5]
 [6.9 3.1 4.

### actual output

In [4]:
Y = iris.target[:100]

In [5]:
print(Y)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


## Normalizing the data

In [6]:
def normalize(data):
    col_max = np.max(data, axis = 0)
    col_min = np.min(data, axis = 0)
    return np.divide(data - col_min, col_max - col_min)

X_norm = normalize(X)

In [14]:
print(X_norm)

[[0.2962963  0.625      0.09756098 0.05882353]
 [0.22222222 0.41666667 0.09756098 0.05882353]
 [0.14814815 0.5        0.07317073 0.05882353]
 [0.11111111 0.45833333 0.12195122 0.05882353]
 [0.25925926 0.66666667 0.09756098 0.05882353]
 [0.40740741 0.79166667 0.17073171 0.17647059]
 [0.11111111 0.58333333 0.09756098 0.11764706]
 [0.25925926 0.58333333 0.12195122 0.05882353]
 [0.03703704 0.375      0.09756098 0.05882353]
 [0.22222222 0.45833333 0.12195122 0.        ]
 [0.40740741 0.70833333 0.12195122 0.05882353]
 [0.18518519 0.58333333 0.14634146 0.05882353]
 [0.18518519 0.41666667 0.09756098 0.        ]
 [0.         0.41666667 0.02439024 0.        ]
 [0.55555556 0.83333333 0.04878049 0.05882353]
 [0.51851852 1.         0.12195122 0.17647059]
 [0.40740741 0.79166667 0.07317073 0.17647059]
 [0.2962963  0.625      0.09756098 0.11764706]
 [0.51851852 0.75       0.17073171 0.11764706]
 [0.2962963  0.75       0.12195122 0.11764706]
 [0.40740741 0.58333333 0.17073171 0.05882353]
 [0.2962963  

### data in X_norm is of shape (100,4), but for SNN, we need a matrix with(feature, no of samples) format, so we will transpose X_norm to get the same

In [7]:
X_data = X_norm.T
Y_data = Y.reshape(1,100)

## Initializing Weights and bias
### initialize the weights and bias to some random values. Since we have four features, the weight vector should be of (4,1) dimension and bias of shape(1,1)
### In this case, we will be initializing all these values to 0

In [8]:
def initializeNetwork(num_features):
    W = np.zeros((num_features, 1))
    b = 0
    parameters = {"W": W, "b": b}
    return parameters

## Defining Activation Function
### Since this is binary classification, let's consider a sigmoid function that maps any linear input to values between 0 and 1

In [9]:
def sigmoid(z):
    return 1/(1 + np.exp(-z))

## Forward Propagation

In [10]:
def forwardPropagation(X, Y, parameters):
    W = parameters["W"]
    b = parameters["b"]
    Z = np.dot(W.T,X) + b
    A = sigmoid(Z)
    return A

### calculating the cost function for a given number of samples

In [11]:
def cost(A, Y, num_samples):
    return -1/num_samples * np.sum(Y * np.log(A) + (1- Y)*(np.log(1-A)))

## Defining Backpropagation
### Using the output A, now we need to find the derivatives of weights and bias

In [12]:
def backPropagation(X, Y, A, num_samples):
    dZ = A - Y
    dW = (np.dot(X, dZ.T))/num_samples
    db = np.sum(dZ)/num_samples
    return dW, db

### subtracting the derivatives from original weights and bias, while doing that, we'll multiply the derivatives with a learning rate to have control over the gradient at each step of iteration

In [13]:
def updateParameters(parameters, dW, db, learning_rate):
    W = parameters["W"] - (learning_rate * dW)
    b = parameters["b"] - (learning_rate * db)
    return {"W": W, "b": b}

## Defining the Neural Network model

In [14]:
def model(X, Y, num_iter, learning_rate):
    num_features = X.shape[0]
    num_samples = float(X.shape[1])
    parameters = initializeNetwork(num_features)
    for i in range(num_iter):
        A = forwardPropagation(X, Y, parameters)
        if(i%100 == 0):
            print("cost after {} iteration".format(i, cost(A, Y, num_samples)))
        dW, db = backPropagation(X, Y, A, num_samples)
        parameters = updateParameters(parameters, dW, db, learning_rate)
    return parameters

In [15]:
parameters = model(X_data, Y, 1000, 0.1)

cost after 0 iteration
cost after 100 iteration
cost after 200 iteration
cost after 300 iteration
cost after 400 iteration
cost after 500 iteration
cost after 600 iteration
cost after 700 iteration
cost after 800 iteration
cost after 900 iteration
