In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
data = pd.read_csv('/content/Churn_Modelling.csv')
data.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
data = data.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)

In [4]:
label_encoder = LabelEncoder()
data['Gender'] = label_encoder.fit_transform(data['Gender'])
data = pd.get_dummies(data, columns=['Geography'], drop_first=True)


In [5]:
X = data.drop('Exited', axis=1)
y = data['Exited']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [7]:
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)

In [8]:
y_pred = rf_model.predict(X_test)
print("Accuracy Score:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy Score: 0.8665

Classification Report:
               precision    recall  f1-score   support

           0       0.88      0.97      0.92      1607
           1       0.76      0.46      0.58       393

    accuracy                           0.87      2000
   macro avg       0.82      0.71      0.75      2000
weighted avg       0.86      0.87      0.85      2000


Confusion Matrix:
 [[1551   56]
 [ 211  182]]


In [9]:
user_data = {
    'CreditScore': int(input("Enter Credit Score: ")),
    'Geography': input("Enter Geography (France, Germany, Spain): "),
    'Gender': input("Enter Gender (Male, Female): "),
    'Age': int(input("Enter Age: ")),
    'Tenure': int(input("Enter Tenure (years with the company): ")),
    'Balance': float(input("Enter Account Balance: ")),
    'NumOfProducts': int(input("Enter Number of Products: ")),
    'HasCrCard': int(input("Does the customer have a credit card? (1 for Yes, 0 for No): ")),
    'IsActiveMember': int(input("Is the customer an active member? (1 for Yes, 0 for No): ")),
    'EstimatedSalary': float(input("Enter Estimated Salary: "))
}
user_df = pd.DataFrame([user_data])
user_df['Gender'] = label_encoder.transform(user_df['Gender'])
user_df = pd.get_dummies(user_df, columns=['Geography'], drop_first=True)
for col in X.columns:
    if col not in user_df.columns:
        user_df[col] = 0
user_df = scaler.transform(user_df)
churn_prediction = rf_model.predict(user_df)
if churn_prediction[0] == 1:
    print("Prediction: The customer is likely to churn.")
else:
    print("Prediction: The customer is likely to stay.")


Enter Credit Score: 619
Enter Geography (France, Germany, Spain): France
Enter Gender (Male, Female): Female
Enter Age: 42
Enter Tenure (years with the company): 2
Enter Account Balance: 0
Enter Number of Products: 1
Does the customer have a credit card? (1 for Yes, 0 for No): 1
Is the customer an active member? (1 for Yes, 0 for No): 1
Enter Estimated Salary: 101348.88
Prediction: The customer is likely to stay.
