In [2]:
import pandas as pd
from google.colab import files
uploaded = files.upload()
df = pd.read_csv("Churn_Modelling.csv")
df.head()


Saving Churn_Modelling.csv to Churn_Modelling.csv


Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


In [3]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
df_processed = df.drop(['RowNumber', 'CustomerId', 'Surname'], axis=1)
le_gender = LabelEncoder()
df_processed['Gender'] = le_gender.fit_transform(df_processed['Gender'])  # Male=1, Female=0

le_geo = LabelEncoder()
df_processed['Geography'] = le_geo.fit_transform(df_processed['Geography'])  # France=0, Germany=1, Spain=2
X = df_processed.drop('Exited', axis=1)
y = df_processed['Exited']
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y)

print("Data is cleaned, encoded, and split.")


Data is cleaned, encoded, and split.


In [4]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(max_iter=1000, class_weight='balanced', random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score, classification_report
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred, digits=4))


Accuracy: 0.6875

Classification Report:
              precision    recall  f1-score   support

           0     0.9013    0.6824    0.7767      1593
           1     0.3627    0.7076    0.4796       407

    accuracy                         0.6875      2000
   macro avg     0.6320    0.6950    0.6282      2000
weighted avg     0.7917    0.6875    0.7162      2000



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [5]:
import pickle
with open("churn_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("Model saved as churn_model.pkl")


Model saved as churn_model.pkl


In [6]:
def predict_churn_by_index_user_input():
    try:
        idx = int(input(f"Enter a customer index (0 to {len(df) - 1}): "))
        if idx < 0 or idx >= len(df):
            print("Invalid index.")
            return
    except ValueError:
        print("Please enter a valid number.")
        return

    actual = df.loc[idx, "Exited"]

    row = df.loc[idx].copy()
    row = row.drop(["RowNumber", "CustomerId", "Surname", "Exited"])
    row["Gender"] = 1 if row["Gender"] == "Male" else 0

    geo_map = {"France": 0, "Germany": 1, "Spain": 2}
    row["Geography"] = geo_map.get(row["Geography"], 0)

    input_data = pd.DataFrame([row.values], columns=X.columns)

    prediction = model.predict(input_data)[0]
    probability = model.predict_proba(input_data)[0]

    print("\nCustomer Details:")
    print(f"Age: {df.loc[idx, 'Age']}")
    print(f"Gender: {df.loc[idx, 'Gender']}")
    print(f"Geography: {df.loc[idx, 'Geography']}")
    print(f"Credit Score: {df.loc[idx, 'CreditScore']}")
    print(f" Balance: ₹{df.loc[idx, 'Balance']:,.2f}")
    print(f"Products: {df.loc[idx, 'NumOfProducts']}")
    print(f"Estimated Salary: ₹{df.loc[idx, 'EstimatedSalary']:,.2f}")

    print("\nModel Prediction:", "Will CHURN" if prediction == 1 else "Will STAY")
    print("Actual Label:    ", "CHURNED" if actual == 1 else "STAYED")
    print(f"Confidence:      {max(probability):.2%}")

    if prediction == actual:
        print("Model prediction is CORRECT.")
    else:
        print("Model prediction is WRONG.")


In [7]:
predict_churn_by_index_user_input()


Enter a customer index (0 to 9999): 1

Customer Details:
Age: 41
Gender: Female
Geography: Spain
Credit Score: 608
 Balance: ₹83,807.86
Products: 1
Estimated Salary: ₹112,542.58

Model Prediction: Will CHURN
Actual Label:     STAYED
Confidence:      58.46%
Model prediction is WRONG.


In [8]:
from google.colab import files
files.download("churn_model.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>