In [None]:
import numpy as np
import pandas as pd

In [None]:
df=pd.read_csv('/content/drive/MyDrive/Churn_Modelling.csv')

In [None]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [None]:
df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [None]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer

In [None]:
df = df.drop(columns=['RowNumber', 'CustomerId', 'Surname'])

In [None]:
df.shape

(10000, 11)

In [None]:
X = df.drop(columns=['Exited'])
y = df['Exited']

In [None]:
X

Unnamed: 0,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary
0,619,France,Female,42,2,0.00,1,1,1,101348.88
1,608,Spain,Female,41,1,83807.86,1,0,1,112542.58
2,502,France,Female,42,8,159660.80,3,1,0,113931.57
3,699,France,Female,39,1,0.00,2,0,0,93826.63
4,850,Spain,Female,43,2,125510.82,1,1,1,79084.10
...,...,...,...,...,...,...,...,...,...,...
9995,771,France,Male,39,5,0.00,2,1,0,96270.64
9996,516,France,Male,35,10,57369.61,1,1,1,101699.77
9997,709,France,Female,36,7,0.00,1,0,1,42085.58
9998,772,Germany,Male,42,3,75075.31,2,1,0,92888.52


In [None]:
y

Unnamed: 0,Exited
0,1
1,0
2,1
3,0
4,0
...,...
9995,0
9996,0
9997,1
9998,1


In [None]:
numerical_features = ['CreditScore', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'EstimatedSalary']
categorical_features = ['Geography', 'Gender']

In [None]:
numerical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())])

In [None]:
numerical_transformer

In [None]:
categorical_transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='constant', fill_value='missing')),
    ('onehot', OneHotEncoder(handle_unknown='ignore'))])

In [None]:
categorical_transformer

In [None]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numerical_transformer, numerical_features),
        ('cat', categorical_transformer, categorical_features)])

In [None]:
X_preprocessed = preprocessor.fit_transform(X)

In [None]:
X_preprocessed

array([[-0.32622142,  0.29351742, -1.04175968, ...,  0.        ,
         1.        ,  0.        ],
       [-0.44003595,  0.19816383, -1.38753759, ...,  1.        ,
         1.        ,  0.        ],
       [-1.53679418,  0.29351742,  1.03290776, ...,  0.        ,
         1.        ,  0.        ],
       ...,
       [ 0.60498839, -0.27860412,  0.68712986, ...,  0.        ,
         1.        ,  0.        ],
       [ 1.25683526,  0.29351742, -0.69598177, ...,  0.        ,
         0.        ,  1.        ],
       [ 1.46377078, -1.04143285, -0.35020386, ...,  0.        ,
         1.        ,  0.        ]])

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_preprocessed, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [None]:
model_lr = LogisticRegression()
model_lr.fit(X_train, y_train)

In [None]:
y_pred_lr = model_lr.predict(X_test)

In [None]:
print("Logistic Regression:")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))

Logistic Regression:
Accuracy: 0.8035
              precision    recall  f1-score   support

           0       0.82      0.97      0.89      1607
           1       0.50      0.14      0.22       393

    accuracy                           0.80      2000
   macro avg       0.66      0.55      0.55      2000
weighted avg       0.76      0.80      0.76      2000

[[1552   55]
 [ 338   55]]


In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
model_rf = RandomForestClassifier(random_state=42)
model_rf.fit(X_train, y_train)

In [None]:
y_pred_rf = model_rf.predict(X_test)

In [None]:
print("Random Forest:")
print("Accuracy:", accuracy_score(y_test, y_pred_lr))
print(classification_report(y_test, y_pred_lr))
print(confusion_matrix(y_test, y_pred_lr))

Random Forest:
Accuracy: 0.8035
              precision    recall  f1-score   support

           0       0.82      0.97      0.89      1607
           1       0.50      0.14      0.22       393

    accuracy                           0.80      2000
   macro avg       0.66      0.55      0.55      2000
weighted avg       0.76      0.80      0.76      2000

[[1552   55]
 [ 338   55]]


In [None]:
from sklearn.model_selection import GridSearchCV
param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [None, 10, 20]
}
grid_search = GridSearchCV(estimator=RandomForestClassifier(random_state=42), param_grid=param_grid, cv=5, scoring='roc_auc')
grid_search.fit(X_train, y_train)
print(f'Best parameters: {grid_search.best_params_}')
print(f'Best score: {grid_search.best_score_}')

Best parameters: {'max_depth': 10, 'n_estimators': 200}
Best score: 0.8485853013657604


In [None]:
from sklearn.metrics import roc_auc_score
y_prob_rf = model_rf.predict_proba(X_test)[:, 1]
auc_rf = roc_auc_score(y_test, y_prob_rf)
print(f'Random Forest ROC-AUC: {auc_rf}')

Random Forest ROC-AUC: 0.845776508943854


In [None]:
y_pred_rf

array([0, 0, 0, ..., 1, 0, 1])

In [None]:
testing=[1,608,0.32622142,  0.29351742,	41,	1,	83807.86,	1,	0,	1,	112542.58]

In [None]:
y_pred=model_lr.predict([testing])

In [None]:
y_pred

array([0])