# Neural Network with One hidden layer

In [1]:
import warnings
warnings.filterwarnings("ignore")
# Importing the libraries
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# Importing the dataset
dataset = pd.read_csv('Churn_Modelling.csv')
X = dataset.iloc[:, 3:13].values
y = dataset.iloc[:, 13].values

In [3]:
dataset.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# Encoding categorical data
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
labelencoder_X_1 = LabelEncoder()
X[:, 1] = labelencoder_X_1.fit_transform(X[:, 1])
labelencoder_X_2 = LabelEncoder()
X[:, 2] = labelencoder_X_2.fit_transform(X[:, 2])
onehotencoder = OneHotEncoder(categorical_features = [1])
X = onehotencoder.fit_transform(X).toarray()
#to avoid dummy variable trap
X = X[:, 1:]

In [5]:
# Splitting the dataset into the Training set and Test set
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

In [6]:
# Feature Scaling
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [7]:
X_train=X_train.T
X_test=X_test.T
y_train=y_train.reshape(y_train.shape[0],1)
y_test=y_test.reshape(y_test.shape[0],1)
y_train=y_train.T
y_test=y_test.T

In [8]:
shape_X=X_train.shape
shape_Y=y_train.shape
m=X_train.shape[1]

In [9]:
print ('The shape of X is: ' + str(shape_X))
print ('The shape of Y is: ' + str(shape_Y))
print ('I have m = %d training examples!' % (m))

The shape of X is: (11, 8000)
The shape of Y is: (1, 8000)
I have m = 8000 training examples!


In [10]:
def layer_sizes(X,Y):
    n_x=X.shape[0]
    n_h=10
    n_y=Y.shape[0]
    return (n_x,n_h,n_y)

In [11]:
(n_x,n_h,n_y)=layer_sizes(X_train,y_train)

In [12]:
print("The size of the input layer is: n_x = " + str(n_x))
print("The size of the hidden layer is: n_h = " + str(n_h))
print("The size of the output layer is: n_y = " + str(n_y))

The size of the input layer is: n_x = 11
The size of the hidden layer is: n_h = 10
The size of the output layer is: n_y = 1


In [13]:
def initialize_parameters(n_x,n_h,n_y):
    W1=np.random.randn(n_h,n_x)*0.01
    b1=np.zeros((n_h,1))
    W2=np.random.randn(n_y,n_h)*0.01
    b2=np.zeros((n_y,1))
    parameters={
            "W1":W1,
            "b1":b1,
            "W2":W2,
            "b2":b2}
    return parameters

In [14]:
parameters=initialize_parameters(n_x,n_h,n_y)
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[-0.01087226  0.00133866 -0.0046199  -0.00880013  0.00554151 -0.00848744
  -0.00674436 -0.00749836  0.00412577 -0.00665255  0.00333818]
 [ 0.00734115  0.00680086 -0.00594362  0.00818413 -0.02312955  0.00416452
  -0.00407299  0.00810755 -0.01488571  0.01309719  0.00847865]
 [ 0.00637767 -0.01013773  0.00567197  0.02009643 -0.0008565  -0.00567022
  -0.00018738 -0.00196056  0.0131834   0.01114026 -0.02368298]
 [-0.00116807  0.01105694 -0.00320739 -0.00598348 -0.01527274  0.02210257
  -0.00620276  0.0056699   0.02026517  0.00800886  0.01078677]
 [ 0.0238804  -0.00083884 -0.00781985  0.02533432 -0.01859611  0.00259433
   0.00336505  0.00882712 -0.01710771  0.00020153  0.00568872]
 [-0.00238332 -0.0046547   0.00050486  0.00482879 -0.00559597 -0.01534744
   0.00435732 -0.00986108 -0.00719261 -0.00999617  0.01811962]
 [ 0.00873483  0.00010439 -0.01126518  0.00172401  0.00704141  0.00849009
  -0.0011366   0.00573551  0.0073765   0.02136174 -0.01707167]
 [ 0.00144764 -0.01136527 -0.0190088

In [15]:
def forward_propagation(X,parameters):
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    
    Z1=np.dot(W1,X)+b1
    A1=np.tanh(Z1)
    Z2=np.dot(W2,A1)+b2
    A2=1/(1+np.exp(-Z2))
    
    cache={
            "Z1":Z1,
            "A1":A1,
            "Z2":Z2,
            "A2":A2}
    return A2,cache

In [16]:
A2,cache=forward_propagation(X_train,parameters)

In [17]:
def compute_cost(A2,Y,parameters):
    m=Y.shape[1]
    logprobs=np.multiply(np.log(A2),Y)+np.multiply(np.log(1-A2),(1-Y))
    cost=-(1/m)*np.sum(logprobs)
    
    return cost

In [18]:
print("cost = " + str(compute_cost(A2, y_train, parameters)))

cost = 0.6931907346250585


In [19]:
def backward_propagation(parameters,cache,X,Y):
    m=X.shape[1]
    W1=parameters["W1"]
    b1=parameters["b1"]
    W2=parameters["W2"]
    b2=parameters["b2"]
    
    A1=cache["A1"]
    A2=cache["A2"]
    
    dZ2=A2-Y
    dW2=(1/m)*np.dot(dZ2,A1.T)
    db2=(1/m)*np.sum(dZ2,axis=1,keepdims=True)
    dZ1 = np.multiply((np.dot(W2.T,dZ2)),(1-np.power(A1,2)))
    dW1 = 1/(m)*np.dot(dZ1,X.T)
    db1 = 1/(m)*np.sum(dZ1,axis=1,keepdims=True)
    
    grads = {"dW1": dW1,
             "db1": db1,
             "dW2": dW2,
             "db2": db2}
    return grads

In [20]:
grads = backward_propagation(parameters, cache, X_train, y_train)
print ("dW1 = "+ str(grads["dW1"]))
print ("db1 = "+ str(grads["db1"]))
print ("dW2 = "+ str(grads["dW2"]))
print ("db2 = "+ str(grads["db2"]))

dW1 = [[-1.05826416e-03  2.60743237e-04  1.75406527e-04  6.42455416e-04
  -1.71560237e-03  1.56202930e-04 -7.14166166e-04  3.24459508e-04
   7.64052394e-05  9.09123706e-04 -6.28841947e-05]
 [ 9.02435255e-04 -2.21588298e-04 -1.50337949e-04 -5.47987474e-04
   1.46485572e-03 -1.32794348e-04  6.08734898e-04 -2.76265951e-04
  -6.73892821e-05 -7.74326302e-04  5.46104520e-05]
 [-2.17218162e-03  5.34809843e-04  3.59863375e-04  1.31963895e-03
  -3.52314500e-03  3.21380318e-04 -1.46551444e-03  6.66328713e-04
   1.57301094e-04  1.86720945e-03 -1.27503685e-04]
 [ 7.02495256e-04 -1.71939623e-04 -1.16774921e-04 -4.27863378e-04
   1.14160031e-03 -1.02867795e-04  4.73977036e-04 -2.15537720e-04
  -5.10767528e-05 -6.04406633e-04  4.25565561e-05]
 [-1.01703487e-03  2.50521310e-04  1.68281833e-04  6.15898153e-04
  -1.64533996e-03  1.49425102e-04 -6.85378650e-04  3.10495552e-04
   7.47912996e-05  8.69730968e-04 -6.05541199e-05]
 [-2.85414376e-04  7.02835800e-05  4.74091102e-05  1.73589747e-04
  -4.63653240

In [21]:
def update_parameters(parameters, grads, learning_rate = 1.2):
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    dW1 = grads["dW1"]
    db1 = grads["db1"]
    dW2 = grads["dW2"]
    db2 = grads["db2"]
    
    W1 = W1-((learning_rate)*dW1)
    b1 = b1-((learning_rate)*db1)
    W2 = W2-((learning_rate)*dW2)
    b2 = b2-((learning_rate)*db2)
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    return parameters

In [22]:
parameters = update_parameters(parameters, grads)

In [23]:
print("W1 = " + str(parameters["W1"]))
print("b1 = " + str(parameters["b1"]))
print("W2 = " + str(parameters["W2"]))
print("b2 = " + str(parameters["b2"]))

W1 = [[-9.60234653e-03  1.02576656e-03 -4.83038966e-03 -9.57108108e-03
   7.60022865e-03 -8.67487973e-03 -5.88736369e-03 -7.88771127e-03
   4.03408261e-03 -7.74349368e-03  3.41363938e-03]
 [ 6.25822745e-03  7.06676264e-03 -5.76321374e-03  8.84171429e-03
  -2.48873819e-02  4.32387158e-03 -4.80347058e-03  8.43906666e-03
  -1.48048400e-02  1.40263838e-02  8.41311449e-03]
 [ 8.98428838e-03 -1.07795020e-02  5.24013670e-03  1.85128623e-02
   3.37127614e-03 -6.05587843e-03  1.57123577e-03 -2.76014947e-03
   1.29946382e-02  8.89960372e-03 -2.35299801e-02]
 [-2.01106142e-03  1.12632708e-02 -3.06725615e-03 -5.47004859e-03
  -1.66426588e-02  2.22260073e-02 -6.77153173e-03  5.92854394e-03
   2.03264612e-02  8.73415240e-03  1.07357006e-02]
 [ 2.51008414e-02 -1.13946859e-03 -8.02178745e-03  2.45952395e-02
  -1.66217000e-02  2.41501855e-03  4.18750284e-03  8.45452884e-03
  -1.71974577e-02 -8.42149090e-04  5.76138208e-03]
 [-2.04082578e-03 -4.73904485e-03  4.47965263e-04  4.62048006e-03
  -5.03958502e

In [24]:
def nn_model(X, Y, n_h, num_iterations = 10000, print_cost=False):
    np.random.seed(3)
    n_x = layer_sizes(X, Y)[0]
    n_y = layer_sizes(X, Y)[2]
    
    parameters = initialize_parameters(n_x, n_h, n_y)
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
    for i in range(0, num_iterations):
         A2, cache = forward_propagation(X,parameters)
         cost = compute_cost(A2,Y,parameters)
         grads = backward_propagation(parameters,cache,X,Y)
         parameters = update_parameters(parameters,grads,2)
         
         if print_cost and i % 1000 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
    return parameters

In [25]:
parameters = nn_model(X_train, y_train, 10, num_iterations=10000, print_cost=True)

Cost after iteration 0: 0.693124
Cost after iteration 1000: 0.320580
Cost after iteration 2000: 0.318329
Cost after iteration 3000: 0.317805
Cost after iteration 4000: 0.317461
Cost after iteration 5000: 0.317251
Cost after iteration 6000: 0.317111
Cost after iteration 7000: 0.316960
Cost after iteration 8000: 0.316730
Cost after iteration 9000: 0.316176


In [26]:
def predict(parameters, X):
    A2, cache = forward_propagation(X,parameters)
    predictions = (A2>0.5)*1
    return predictions

In [27]:
y_pred = predict(parameters, X_test)
print("predictions mean = " + str(np.mean(y_pred)))

predictions mean = 0.148


In [28]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(y_test.T, y_pred.T)

In [29]:
cm

array([[1511,   84],
       [ 193,  212]], dtype=int64)

In [30]:
accuracy=100*(cm[0][0]+cm[1][1])/X_test.shape[1]

In [31]:
print(str(accuracy)+ "%")

86.15%
