In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

In [11]:
data = pd.read_csv("Churn_Modelling.csv")
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [12]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 14 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   RowNumber        10000 non-null  int64  
 1   CustomerId       10000 non-null  int64  
 2   Surname          10000 non-null  object 
 3   CreditScore      10000 non-null  int64  
 4   Geography        10000 non-null  object 
 5   Gender           10000 non-null  object 
 6   Age              10000 non-null  int64  
 7   Tenure           10000 non-null  int64  
 8   Balance          10000 non-null  float64
 9   NumOfProducts    10000 non-null  int64  
 10  HasCrCard        10000 non-null  int64  
 11  IsActiveMember   10000 non-null  int64  
 12  EstimatedSalary  10000 non-null  float64
 13  Exited           10000 non-null  int64  
dtypes: float64(2), int64(9), object(3)
memory usage: 1.1+ MB


In [13]:
data.shape

(10000, 14)

In [14]:
data.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
RowNumber,10000.0,5000.5,2886.89568,1.0,2500.75,5000.5,7500.25,10000.0
CustomerId,10000.0,15690940.0,71936.186123,15565701.0,15628528.25,15690740.0,15753230.0,15815690.0
CreditScore,10000.0,650.5288,96.653299,350.0,584.0,652.0,718.0,850.0
Age,10000.0,38.9218,10.487806,18.0,32.0,37.0,44.0,92.0
Tenure,10000.0,5.0128,2.892174,0.0,3.0,5.0,7.0,10.0
Balance,10000.0,76485.89,62397.405202,0.0,0.0,97198.54,127644.2,250898.09
NumOfProducts,10000.0,1.5302,0.581654,1.0,1.0,1.0,2.0,4.0
HasCrCard,10000.0,0.7055,0.45584,0.0,0.0,1.0,1.0,1.0
IsActiveMember,10000.0,0.5151,0.499797,0.0,0.0,1.0,1.0,1.0
EstimatedSalary,10000.0,100090.2,57510.492818,11.58,51002.11,100193.9,149388.2,199992.48


In [15]:
data = data.drop(columns=["Surname", "CustomerId","RowNumber"], axis=1)
data.head()

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [16]:
data = pd.get_dummies(data)
data.shape

(10000, 14)

In [17]:
y = pd.DataFrame(data.Exited)
x = data.drop(["Exited"], axis=1)

In [18]:
class SimpleNeuralNetwork:

    def __init__(self):
        # Learning Rate
        self.l_rate = 0.001
        # Total iterations
        self.echops = 2000

    def train(self, x_train_data, y_train_data):

        # weights_value matrix shape = (data_set_size x 1)
        weight = np.random.rand(1, x_train_data.shape[1]) * 0.005

        # biased_value matrix shape = (1 x 1)
        baised = np.zeros((1, 1), dtype='float')

        for i in range(self.echops):

            # z = w.T*x + b and z matrix shape = (1 x data_set_size)
            z = np.dot(x_train_data, weight.T) + baised

            # y^_train = sigmoid(z) and y^ matrix shape = (1 x data_set_size)
            y_predict = 1 / 1 + np.exp(-z)

            # dz = y^_train - y and dz matrix shape = (1 x data_set_size)
            dz = y_predict - y_train_data

            # dw = (x * dz.T ) / data_set_size and dw matrix shape = (features_size x 1)
            dw = np.dot(x_train_data.T, dz) / x_train_data.shape[0]

            # db = (dz.sum ) / data_set_size and db matrix shape = (1 x 1)
            db = dz.sum() / x_train_data.shape[0]

            # w = w - alpha * dw and w matrix shape = (features_size x 1)
            weight = weight - self.l_rate*dw.T

            # b = b - alpha * db and b matrix shape = (features_size x 1)
            baised = baised - self.l_rate*db
        
        print("Size  z is {} a is {} dz is {} dw is {} db is {} w is {} b is {} ".format(z.shape, y_predict.shape, dz.shape, dw.shape, db.shape, weight.shape, baised.shape))


        return weight, baised

    def predict(self, x_data, parameters):

        # z = w.T*x + b and z matrix shape = (1 x data_set_size)
        z = np.dot(x_data, parameters[0].T) + parameters[1]

        # y^_test = sigmoid(z) and y^ matrix shape = (1 x data_set_size)
        y_predict = 1 / 1 + np.exp(-z)
        
        print("y_predicted size ", y_predict.shape)

        return y_predict

    #Defining function to calculate accuracy of data 
    def accuracy(self, y_data_test, y_pred_test):

        y_pred_test = np.nan_to_num(y_pred_test)
    
        test_accuracy = 100 - (np.mean(np.abs(y_pred_test - y_data_test)) * 100)
        
        return test_accuracy


def main():
    
    #initializing object for class
    snn = SimpleNeuralNetwork()
    
    #setting size for test and train data
    train_size = int(np.ceil((len(y) * 0.80)))
    
    #Training data
    x_train_data = np.array(x[:train_size])
    y_train_data = np.array(y[:train_size])
    
    #Testing data
    x_test_data = np.array(x[train_size:])
    y_test_data = np.array(y[train_size:])
    
    
    parameters = snn.train(x_train_data, y_train_data)
    
    y_prediction = snn.predict(x_test_data, parameters)
    
    y_prediction_train = snn.predict(x_train_data, parameters)
    
    accuracy = snn.accuracy(y_test_data, y_prediction)
    accuracy_train = snn.accuracy(y_train_data, y_prediction_train)

    print("Train accuracy: ", accuracy_train)
    print("Test accuracy: ", accuracy)


if __name__ == '__main__':
    main()

Size  z is (8000, 1) a is (8000, 1) dz is (8000, 1) dw is (13, 1) db is () w is (1, 13) b is (1, 1) 
y_predicted size  (2000, 1)
y_predicted size  (8000, 1)
Train accuracy:  79.4125
Test accuracy:  80.5
