In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import math

In [2]:
df=pd.read_csv('Bank customer data.csv')

In [3]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [4]:
# Drop some features that will not be much of connected to the customer stay or leave bank.
df = df.drop({'RowNumber', 'CustomerId', 'Surname','Geography', 'Tenure'}, axis=1)

In [5]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,Female,42,0.0,1,1,1,101348.88,1
1,608,Female,41,83807.86,1,0,1,112542.58,0
2,502,Female,42,159660.8,3,1,0,113931.57,1
3,699,Female,39,0.0,2,0,0,93826.63,0
4,850,Female,43,125510.82,1,1,1,79084.1,0


In [6]:
df.isnull().sum()

CreditScore        0
Gender             0
Age                0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [7]:
# change gender to 0 for male and 1 for female
def convert_gender(Gender):
    if(Gender=='Female'):
        return 1
    else:
        return 0

In [8]:
df['Gender'] = df['Gender'].apply(convert_gender)

In [9]:
df.head()

Unnamed: 0,CreditScore,Gender,Age,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,619,1,42,0.0,1,1,1,101348.88,1
1,608,1,41,83807.86,1,0,1,112542.58,0
2,502,1,42,159660.8,3,1,0,113931.57,1
3,699,1,39,0.0,2,0,0,93826.63,0
4,850,1,43,125510.82,1,1,1,79084.1,0


In [10]:
X = df.drop('Exited', axis=1)
y=df['Exited']

In [11]:
# scale the data to prevent some features dominate the others. For example here, estimate salary and balance.

In [12]:
from sklearn.preprocessing import StandardScaler

In [13]:
scaler = StandardScaler()

In [14]:
X = scaler.fit_transform(X)

In [15]:
from sklearn.model_selection import train_test_split

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

In [17]:
X_train[:10]

array([[ 0.36701255, -0.91241915, -0.66001848, -1.22584767,  0.80773656,
         0.64609167,  0.97024255,  1.37378358],
       [-0.19171334, -0.91241915,  0.29351742,  0.69138881,  0.80773656,
         0.64609167,  0.97024255,  1.66740729],
       [-0.94702796, -0.91241915, -1.42284721,  0.61310151, -0.91158349,
         0.64609167, -1.03067011, -0.24691025],
       [-0.92633441,  1.09598752, -1.13678644,  0.94802129, -0.91158349,
         0.64609167, -1.03067011,  0.92144593],
       [-1.38159254, -0.91241915,  1.62846769,  1.0523629 , -0.91158349,
        -1.54776799, -1.03067011, -1.0538117 ],
       [-0.84356021, -0.91241915,  0.19816383,  0.70130783, -0.91158349,
         0.64609167, -1.03067011,  1.09654674],
       [ 0.33597222,  1.09598752,  0.10281024, -1.22584767,  0.80773656,
        -1.54776799, -1.03067011, -0.42304378],
       [ 1.12232718,  1.09598752,  0.5795782 , -1.22584767,  0.80773656,
         0.64609167,  0.97024255, -0.01458937],
       [-2.32314913,  1.09598752

In [18]:
y_train = np.array(y_train)

In [37]:
# declare hte variables with some values
regressors = len(X_train[0])
init_m = 0
init_b = [0 for x in range(regressors)]
learning_rate = 0.0001
num_iteration = 500
N = len(X_train)
landa = 100



# training set
def minimize_m_b(X_train,y_train,init_m1 , init_b1, learning_rate, landa):
    minimize_m = init_m1
    minimize_b = [x for x in init_b1] 
    # without setting up the values for minimize_b in this way, the values for both will keep adding up
    minimized_b = [0 for x in range(regressors)]
    
    for i in range(N):
        sum_m_b = 0 # forgot to put this and cause the huge numbers for m and b
        
        for j in range(regressors):
            sum_m_b += init_b1[j] * X_train[i][j]
            
        minimize_m +=  (2*y_train[i]+1)/(math.exp(init_m1 + sum_m_b)+1)
        
        for k in range(regressors):
            minimize_b[k] += (((2*y_train[i]+1)*X_train[i][k]) / (math.exp(init_m1+sum_m_b)+1)) * (1/N) + (X_train[i][k]*landa/N)
        
        
    minimized_m = init_m1 + (learning_rate/N) * minimize_m

    for l in range(regressors):
        minimized_b[l] = init_b1[l] + learning_rate * minimize_b[l]
    return [minimized_m, minimized_b]


def run_minimize_m_b(X_train,y_train,init_m, init_b, learning_rate, num_iteration, landa):
    init_m1 = init_m
    init_b1 = [x for x in init_b]
    for i in range(num_iteration):
        init_m1,init_b1 = minimize_m_b(X_train,y_train, init_m1, init_b1, learning_rate, landa)
    return [init_m1, init_b1, learning_rate, num_iteration, landa] # the problem for the huge numbers are here, after switch to m1 and b1, the number get smaller



m, b, learning_rate, iterations, landa = run_minimize_m_b(X_train,y_train, init_m, init_b, learning_rate, num_iteration, landa)
print ("intercept: ",m, "Slope: ",b)       

intercept:  0.0349645343497 Slope:  [0.058645198613769337, 0.0067734193763950437, -0.0059350436356891976, -0.029063806560589135, 0.0092177851681744596, -0.022584845444746347, -0.027274928313950363, 0.030572297590804803]


In [25]:
y_test = np.array(y_test)

In [26]:
# check how many 1 and 0 in y_test

count_y0 = 0
count_y1 = 0
for i in range(len(y_test)):
    if(y_test[i]==1):
        count_y1 += 1
    else:
        count_y0 +=1

In [27]:
print(count_y0, count_y1)

1607 393


In [36]:
# keep it as it is
pred_y = []
sum_b=0
for x in range(len(y_test)):
    for i in range (regressors):
        sum_b += b[i]*X_test[x][i]
    pred_y.append(m + sum_b)
    sum_b = 0
    
pred_y

[0.0029080692942246508,
 0.10604073704222031,
 0.082396445698388998,
 -1.2404932675873797,
 -0.97428357333879234,
 1.2020680701833404,
 -1.5520418148730086,
 -0.32565035187953567,
 -0.36661735224501729,
 -1.2617750062068929,
 -0.51379923214444323,
 0.71244304723402629,
 1.9734399575846024,
 0.22214333839048089,
 -0.67594829826074576,
 0.1672672524314664,
 -0.41737751984967841,
 -1.2697085314209351,
 0.22457765842757332,
 0.11294267087881416,
 0.79335119149443944,
 0.49421640838046316,
 1.3296990532528254,
 1.2321004458989608,
 0.74101808182557882,
 -0.99197625072984008,
 -0.73986674738272862,
 0.83798291678823467,
 0.06263198117239771,
 -1.0327952587298275,
 -0.87391337701801397,
 1.3255892240146321,
 -0.89294397561460226,
 0.090886705907858245,
 -0.35756443238290841,
 -0.73611690200139468,
 1.0859743391031742,
 -0.17616903106714038,
 0.69040311124701159,
 0.69763920323484518,
 0.28279455241613377,
 -1.6983382693977134,
 -0.41921350406005159,
 -1.019008274299789,
 0.8731946191437453,
 

In [31]:
# try to apply sclaer inverse in x_test

X_test2 = scaler.inverse_transform(X_test)
pred_y = []
sum_b=0
for x in range(len(y_test)):
    for i in range (regressors):
        sum_b += b[i]*X_test2[x][i]
    pred_y.append(m + sum_b)
    sum_b = 0

In [32]:
pred_y
# terrible

[-17061.088118465294,
 44570.427217663004,
 18035.262562094089,
 14840.892299274408,
 -3854.3089754601779,
 45587.216893847159,
 -20017.389449701877,
 -9876.0716410391215,
 -29920.738481826706,
 23225.940442132985,
 51534.528439279391,
 25862.096871307989,
 57089.87656498322,
 420.49980783888606,
 -33201.837890401213,
 11303.498732737422,
 2168.1042193876965,
 601.73513997017733,
 44396.48636908047,
 1563.0495893157001,
 24107.185091118077,
 13453.80932845938,
 31520.011440445782,
 6245.9652326857804,
 41630.631715506308,
 -62886.257692154599,
 -25321.406073710154,
 -24243.43185100843,
 3125.0286598407183,
 4087.6630429749102,
 -38606.179691877107,
 9688.8586146152829,
 -8421.9871102955076,
 2913.8536717337333,
 -14314.198740708303,
 -29145.536702569661,
 10878.596320651952,
 29918.654943107551,
 -4884.8788189294964,
 27554.31402893375,
 7428.4084602612829,
 -33012.993354749204,
 1506.3241694033475,
 15302.705165963152,
 26304.22619783715,
 32274.768717505729,
 14365.742119895294,
 286

In [35]:
# maybe inverse the b
b2 = scaler.inverse_transform(b)

X_test2 = scaler.inverse_transform(X_test)
pred_y = []
sum_b=0
for x in range(len(y_test)):
    for i in range (regressors):
        sum_b += b[i]*X_test[x][i]
    pred_y.append(m + sum_b)
    sum_b = 0
    
pred_y

[0.0029080692942246508,
 0.10604073704222031,
 0.082396445698388998,
 -1.2404932675873797,
 -0.97428357333879234,
 1.2020680701833404,
 -1.5520418148730086,
 -0.32565035187953567,
 -0.36661735224501729,
 -1.2617750062068929,
 -0.51379923214444323,
 0.71244304723402629,
 1.9734399575846024,
 0.22214333839048089,
 -0.67594829826074576,
 0.1672672524314664,
 -0.41737751984967841,
 -1.2697085314209351,
 0.22457765842757332,
 0.11294267087881416,
 0.79335119149443944,
 0.49421640838046316,
 1.3296990532528254,
 1.2321004458989608,
 0.74101808182557882,
 -0.99197625072984008,
 -0.73986674738272862,
 0.83798291678823467,
 0.06263198117239771,
 -1.0327952587298275,
 -0.87391337701801397,
 1.3255892240146321,
 -0.89294397561460226,
 0.090886705907858245,
 -0.35756443238290841,
 -0.73611690200139468,
 1.0859743391031742,
 -0.17616903106714038,
 0.69040311124701159,
 0.69763920323484518,
 0.28279455241613377,
 -1.6983382693977134,
 -0.41921350406005159,
 -1.019008274299789,
 0.8731946191437453,
 