In [240]:
#Binary Prediction

# Import pandas
import pandas as pd

# Import sklearn
import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Import tensorflow
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense


# Load dataframe from dataset 
df = pd.read_csv('Input/Churn.csv')
df.head(1)


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1


In [241]:
# Dropping some unnecessary dtypes that doesn't affect the dataframe (Or does not have any pattern)
df.drop(columns=['RowNumber', 'CustomerId', 'Surname'], inplace=True)

In [242]:
# Check to see the what the datatypes are numerical or not
df.info(verbose=True) # Geography and Gender are non num

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 11 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   CreditScore      10000 non-null  int64  
 1   Geography        10000 non-null  object 
 2   Gender           10000 non-null  object 
 3   Age              10000 non-null  int64  
 4   Tenure           10000 non-null  int64  
 5   Balance          10000 non-null  float64
 6   NumOfProducts    10000 non-null  int64  
 7   HasCrCard        10000 non-null  int64  
 8   IsActiveMember   10000 non-null  int64  
 9   EstimatedSalary  10000 non-null  float64
 10  Exited           10000 non-null  int64  
dtypes: float64(2), int64(7), object(2)
memory usage: 859.5+ KB


In [243]:
# Convert 2 columns Geography and Gender into True and False
df = pd.get_dummies(df,columns=['Geography','Gender'],drop_first=True)
df.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_Germany,Geography_Spain,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,False,False,False
1,608,41,1,83807.86,1,0,1,112542.58,0,False,True,False
2,502,42,8,159660.8,3,1,0,113931.57,1,False,False,False
3,699,39,1,0.0,2,0,0,93826.63,0,False,False,False
4,850,43,2,125510.82,1,1,1,79084.1,0,False,True,False


In [244]:
# Dropping Exited since it is label (output)
X = df.drop(columns=['Exited'])
y = df['Exited'].values # Return the value from y

# Splitting the data into Train set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)




In [245]:
scaler = StandardScaler()

# Fit and then transform X_train, X_test
X_train_trf = scaler.fit_transform(X_train)
X_test_trf = scaler.fit_transform(X_test)

In [246]:
# Sigmoid Activation Function
model = Sequential([
    tf.keras.layers.Dense(units= 11, activation='sigmoid', input_dim=11),
    tf.keras.layers.Dense(units= 11, activation='sigmoid'),
    tf.keras.layers.Dense(units= 1, activation='sigmoid')
])

In [247]:
model.summary()

Model: "sequential_8"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_24 (Dense)            (None, 11)                132       
                                                                 
 dense_25 (Dense)            (None, 11)                132       
                                                                 
 dense_26 (Dense)            (None, 1)                 12        
                                                                 
Total params: 276 (1.08 KB)
Trainable params: 276 (1.08 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [248]:
#Assign optimizer, loss function and metrics
model.compile(optimizer='Adam', loss='binary_crossentropy', metrics=['accuracy'])

In [249]:
# Converting X_train and y_train to tensors
X_train_tensor = tf.convert_to_tensor(X_train_trf)
y_train_tensor = tf.convert_to_tensor(y_train)

X_test_tensor = tf.convert_to_tensor(X_test_trf)

# Fitting the model to reduce losses
model.fit(X_train_tensor, y_train_tensor, batch_size=50, epochs=100, verbose=1, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x14b00a090>

In [250]:
# Get all y prediction based on test model tensors
y_pred = model.predict(X_test_tensor)
y_pred




array([[0.05689274],
       [0.00877046],
       [0.1615263 ],
       ...,
       [0.2855632 ],
       [0.17448764],
       [0.16528569]], dtype=float32)

In [251]:
# Get maximum number with axis = -1 in all y prediction
y_pred = y_pred.argmax(axis=-1)

In [252]:
# Checking accuracy score
sk.metrics.accuracy_score(y_test, y_pred)

0.802