<a href="https://colab.research.google.com/github/pooji1620/churn-prediction/blob/main/churn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas numpy scikit-learn matplotlib seaborn




In [2]:
import pandas as pd
import numpy as np

np.random.seed(42)

# Create a synthetic churn dataset
n = 500
df = pd.DataFrame({
    "customer_id": np.arange(1, n+1),
    "tenure": np.random.randint(1, 60, n),
    "monthly_charges": np.random.randint(100, 1000, n),
    "total_charges": np.random.randint(1000, 60000, n),
    "support_calls": np.random.randint(0, 10, n),
    "is_active": np.random.randint(0, 2, n),
    "churn": np.random.randint(0, 2, n)  # target variable
})

df.head()


Unnamed: 0,customer_id,tenure,monthly_charges,total_charges,support_calls,is_active,churn
0,1,39,933,56393,7,0,1
1,2,52,899,9017,0,1,0
2,3,29,826,7941,6,1,1
3,4,15,953,26934,0,1,0
4,5,43,150,24386,6,1,1


In [3]:

df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500 entries, 0 to 499
Data columns (total 7 columns):
 #   Column           Non-Null Count  Dtype
---  ------           --------------  -----
 0   customer_id      500 non-null    int64
 1   tenure           500 non-null    int64
 2   monthly_charges  500 non-null    int64
 3   total_charges    500 non-null    int64
 4   support_calls    500 non-null    int64
 5   is_active        500 non-null    int64
 6   churn            500 non-null    int64
dtypes: int64(7)
memory usage: 27.5 KB


In [4]:

df.isnull().sum()


Unnamed: 0,0
customer_id,0
tenure,0
monthly_charges,0
total_charges,0
support_calls,0
is_active,0
churn,0


In [5]:
from sklearn.model_selection import train_test_split
# Features and target
X = df.drop("churn", axis=1)
y = df["churn"]
# Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
X_train.shape, X_test.shape


((400, 6), (100, 6))

In [6]:
from sklearn.linear_model import LogisticRegression
# Create model
model = LogisticRegression()
# Train model
model.fit(X_train, y_train)
print("Model training completed!")


Model training completed!


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [7]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Predict on test data
y_pred = model.predict(X_test)

# Accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Classification Report
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))

# Confusion Matrix
print("\nConfusion Matrix:\n")
print(confusion_matrix(y_test, y_pred))


Accuracy: 0.44

Classification Report:

              precision    recall  f1-score   support

           0       0.48      0.24      0.32        55
           1       0.42      0.69      0.53        45

    accuracy                           0.44       100
   macro avg       0.45      0.46      0.42       100
weighted avg       0.46      0.44      0.41       100


Confusion Matrix:

[[13 42]
 [14 31]]


In [8]:
from sklearn.preprocessing import StandardScaler

# Scale numeric features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [9]:
model = LogisticRegression()
model.fit(X_train_scaled, y_train)

print("Model retrained with scaling!")

Model retrained with scaling!


In [10]:
y_pred = model.predict(X_test_scaled)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))


Accuracy: 0.46

Classification Report:
               precision    recall  f1-score   support

           0       0.52      0.29      0.37        55
           1       0.43      0.67      0.53        45

    accuracy                           0.46       100
   macro avg       0.48      0.48      0.45       100
weighted avg       0.48      0.46      0.44       100


Confusion Matrix:
 [[16 39]
 [15 30]]


In [11]:
new_customer = pd.DataFrame({
    "customer_id": [501],
    "tenure": [12],
    "monthly_charges": [450],
    "total_charges": [6000],
    "support_calls": [3],
    "is_active": [1]
})
# Scale using the same scaler
new_customer_scaled = scaler.transform(new_customer)
# Predict churn
prediction = model.predict(new_customer_scaled)
probability = model.predict_proba(new_customer_scaled)
print("Churn Prediction (0=No, 1=Yes):", prediction[0])
print("Probability:", probability)


Churn Prediction (0=No, 1=Yes): 1
Probability: [[0.35794497 0.64205503]]


In [12]:
# Example: Customer likely NOT to churn
new_customer_no_churn = pd.DataFrame({
    "customer_id": [999],
    "tenure": [48],             # long tenure â†’ loyal customer
    "monthly_charges": [350],   # moderate price
    "total_charges": [25000],   # high lifetime spend
    "support_calls": [1],       # very few issues
    "is_active": [1]            # active account
})

# Scale
new_customer_scaled = scaler.transform(new_customer_no_churn)

# Predict
prediction = model.predict(new_customer_scaled)
probability = model.predict_proba(new_customer_scaled)

print("Churn Prediction (0 = No, 1 = Yes):", prediction[0])
print("Probability:", probability)


Churn Prediction (0 = No, 1 = Yes): 1
Probability: [[0.31264077 0.68735923]]


In [13]:
new_customer_no_churn = pd.DataFrame({
    "customer_id": [1001],
    "tenure": [59],             # almost max
    "monthly_charges": [200],   # low bill
    "total_charges": [50000],   # high loyalty
    "support_calls": [0],       # no complaints
    "is_active": [1]            # still active
})

# Scale
new_customer_scaled = scaler.transform(new_customer_no_churn)

# Predict
prediction = model.predict(new_customer_scaled)
probability = model.predict_proba(new_customer_scaled)

print("Churn Prediction (0 = No, 1 = Yes):", prediction[0])
print("Probability:", probability)


Churn Prediction (0 = No, 1 = Yes): 1
Probability: [[0.3437101 0.6562899]]


In [14]:
!pip install joblib




In [15]:
import joblib

joblib.dump(model, "churn_model.pkl")


['churn_model.pkl']

In [16]:
joblib.dump(scaler, "scaler.pkl")



['scaler.pkl']

In [17]:
from google.colab import files

files.download("churn_model.pkl")
files.download("scaler.pkl")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>