In [1]:
import pandas as pd
import numpy as np

In [2]:
df=pd.read_csv('Churn_Modelling.csv')

In [4]:
df.shape

(10000, 14)

In [10]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [11]:
df=df.drop(columns=['RowNumber','CustomerId','Surname'])

In [14]:
df = pd.get_dummies(df, columns=['Geography', 'Gender'], drop_first=True)

In [13]:
from sklearn.preprocessing import OneHotEncoder,OrdinalEncoder

In [15]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


In [16]:
scaler=StandardScaler()

In [17]:
scaled_df=scaler.fit_transform(df)

In [20]:
type(scaled_df)

numpy.ndarray

In [22]:
pca = PCA(n_components=0.95)
pca_data = pca.fit_transform(scaled_df)

In [24]:
print("Original shape:", df.shape)
print("Encoded + Scaled shape:", scaled_df.shape)
print("Reduced shape after PCA:", pca_data.shape)

Original shape: (10000, 12)
Encoded + Scaled shape: (10000, 12)
Reduced shape after PCA: (10000, 11)


In [25]:
type(pca_data)

numpy.ndarray

In [26]:
print(pca_data)

[[ 0.26658388  1.71456869 -0.34597059 ... -0.46242964 -1.2379617
   1.77616542]
 [-0.78347493  0.47061021 -2.07998288 ... -1.04737611  0.6108353
  -0.25541455]
 [ 0.90030387  1.98035086  2.14472813 ... -0.59483699  1.11823634
   0.23192226]
 ...
 [ 0.08563942  1.33483313 -0.41791145 ... -1.17584271 -1.24427646
   2.19056958]
 [ 1.75521577  0.81220552  1.23852906 ...  2.24264906  0.54955868
   0.70485757]
 [ 0.33738416 -0.94579013 -0.00732323 ... -0.36302015 -0.67762076
  -0.22616159]]


In [28]:
pca_columns = [f'PC{i+1}' for i in range(11)]

# Convert to DataFrame
pca_df = pd.DataFrame(pca_data, columns=pca_columns)

# (Optional) Add target column back if you had one, e.g., 'Price'
pca_df['Exited'] = df['Exited'].values  # Replace 'Target' with your actual column name

# View the result
print(pca_df.shape)
print(pca_df.head())

(10000, 12)
        PC1       PC2       PC3       PC4       PC5       PC6       PC7  \
0  0.266584  1.714569 -0.345971  0.505321 -0.028921 -0.843609 -0.045345   
1 -0.783475  0.470610 -2.079983  0.432722 -1.936588 -0.574909 -0.201934   
2  0.900304  1.980351  2.144728 -0.951337  0.590253 -0.137702 -0.891276   
3 -0.960895  0.754515  0.952241  0.449305 -2.088612 -0.918784  0.716210   
4 -0.451679  0.070679 -1.992580  0.835894 -0.273326  0.233275  2.209922   

        PC8       PC9      PC10      PC11  Exited  
0  1.271203 -0.462430 -1.237962  1.776165       1  
1  0.250500 -1.047376  0.610835 -0.255415       0  
2 -0.169255 -0.594837  1.118236  0.231922       1  
3 -0.478386  0.274287 -0.903476 -0.691789       0  
4  1.552767 -0.429775  0.779395 -0.405557       0  


In [29]:
pca_df

Unnamed: 0,PC1,PC2,PC3,PC4,PC5,PC6,PC7,PC8,PC9,PC10,PC11,Exited
0,0.266584,1.714569,-0.345971,0.505321,-0.028921,-0.843609,-0.045345,1.271203,-0.462430,-1.237962,1.776165,1
1,-0.783475,0.470610,-2.079983,0.432722,-1.936588,-0.574909,-0.201934,0.250500,-1.047376,0.610835,-0.255415,0
2,0.900304,1.980351,2.144728,-0.951337,0.590253,-0.137702,-0.891276,-0.169255,-0.594837,1.118236,0.231922,1
3,-0.960895,0.754515,0.952241,0.449305,-2.088612,-0.918784,0.716210,-0.478386,0.274287,-0.903476,-0.691789,0
4,-0.451679,0.070679,-1.992580,0.835894,-0.273326,0.233275,2.209922,1.552767,-0.429775,0.779395,-0.405557,0
...,...,...,...,...,...,...,...,...,...,...,...,...
9995,-1.157374,-0.120033,1.114151,0.013827,0.505610,0.337858,0.822761,0.017771,1.553189,-0.858991,-0.390856,0
9996,-0.465889,-1.047345,-0.378802,-0.526757,1.642457,0.454238,-1.486806,-0.358140,-0.949914,-0.914625,0.609383,0
9997,0.085639,1.334833,-0.417911,0.521913,-0.461684,-0.041260,1.029919,-1.413560,-1.175843,-1.244276,2.190570,1
9998,1.755216,0.812206,1.238529,0.258820,0.352734,0.037565,0.868003,0.106607,2.242649,0.549559,0.704858,1


In [33]:
# trainnin the deep learning model 

In [34]:
# 1 train test split

In [35]:
from sklearn.model_selection import train_test_split
X=df.drop(columns=['Exited'])
y=df['Exited']

In [37]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)

In [38]:
X_train.shape

(8000, 11)

In [39]:
y_train.shape

(8000,)

In [40]:
import tensorflow as tf

In [41]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [43]:
# building the model 
mdl=Sequential()
mdl.add(Dense(16,input_dim=X_train.shape[1],activation='relu'))
mdl.add(Dense(8,activation='relu'))
mdl.add(Dense(1,activation='sigmoid'))

In [44]:
mdl.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [46]:
X_train = np.array(X_train).astype('float32')
y_train = np.array(y_train).astype('float32')
X_test = np.array(X_test).astype('float32')
y_test = np.array(y_test).astype('float32')
# this is for the error Failed to convert a NumPy array to a Tensor (Unsupported object type int).

In [47]:
his=mdl.fit(X_train,y_train,epochs=50,batch_size=20,validation_data=(X_test,y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [48]:
# anothher architecture 

In [49]:
# building the model 
mdl2=Sequential()
mdl2.add(Dense(16,input_dim=X_train.shape[1],activation='relu'))
mdl2.add(Dense(16,activation='relu'))
mdl2.add(Dense(16,activation='relu'))
mdl2.add(Dense(1,activation='sigmoid'))

In [50]:
mdl2.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

In [51]:
his2=mdl.fit(X_train,y_train,epochs=50,batch_size=20,validation_data=(X_test,y_test))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [52]:
# so after changing the models arcitecture the performance is incresesd