In [83]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, recall_score
from imblearn.over_sampling import SMOTE

In [67]:
df = pd.read_csv('/Users/mac/Desktop/Customer Churn/Churn_Modelling.csv')

In [68]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [69]:
df.dropna(inplace = True)

In [70]:
df = pd.get_dummies(df, columns = ['Gender', 'HasCrCard', 'Geography'])

In [71]:
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Age,Tenure,Balance,NumOfProducts,IsActiveMember,EstimatedSalary,Exited,Gender_Female,Gender_Male,HasCrCard_0,HasCrCard_1,Geography_France,Geography_Germany,Geography_Spain
0,1,15634602,Hargrave,619,42,2,0.0,1,1,101348.88,1,True,False,False,True,True,False,False
1,2,15647311,Hill,608,41,1,83807.86,1,1,112542.58,0,True,False,True,False,False,False,True
2,3,15619304,Onio,502,42,8,159660.8,3,0,113931.57,1,True,False,False,True,True,False,False
3,4,15701354,Boni,699,39,1,0.0,2,0,93826.63,0,True,False,True,False,True,False,False
4,5,15737888,Mitchell,850,43,2,125510.82,1,1,79084.1,0,True,False,False,True,False,False,True


In [72]:
X = df.drop(columns=['Exited', 'RowNumber', 'CustomerId', 'Surname'])
y = df['Exited']

In [73]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [74]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [75]:
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X_train, y_train)

In [79]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_resampled, y_resampled)

In [80]:
y_pred_prob = rf_classifier.predict_proba(X_test)[:, 1]

In [81]:
threshold = 0.4 
y_pred_adjusted = (y_pred_prob > threshold).astype(int)

In [84]:
accuracy = accuracy_score(y_test, y_pred_adjusted)
conf_matrix = confusion_matrix(y_test, y_pred_adjusted)
classification_rep = classification_report(y_test, y_pred_adjusted)
recall = recall_score(y_test, y_pred_adjusted)

print(f"Accuracy: {accuracy:.2f}")
print(f"Recall: {recall:.2f}")
print("Confusion Matrix:")
print(conf_matrix)
print("Classification Report:")
print(classification_rep)

Accuracy: 0.81
Recall: 0.68
Confusion Matrix:
[[2028  388]
 [ 186  398]]
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.84      0.88      2416
           1       0.51      0.68      0.58       584

    accuracy                           0.81      3000
   macro avg       0.71      0.76      0.73      3000
weighted avg       0.84      0.81      0.82      3000



In [87]:
y_pred = rf_classifier.predict(X_test)


In [89]:
accuracy = accuracy_score(y_test, y_pred)
confusion_matrix_result = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("Confusion Matrix:")
print(confusion_matrix_result)
print("Classification Report:")
print(classification_rep)

Accuracy: 0.84
Confusion Matrix:
[[2167  249]
 [ 234  350]]
Classification Report:
              precision    recall  f1-score   support

           0       0.90      0.90      0.90      2416
           1       0.58      0.60      0.59       584

    accuracy                           0.84      3000
   macro avg       0.74      0.75      0.75      3000
weighted avg       0.84      0.84      0.84      3000

