<a href="https://colab.research.google.com/github/sayalikamble4567/2026-BUS4-118S-Sec-02-Special-Topics-MIS/blob/main/Predict_Customer_Churn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
# Generate sample customer data (generated by ChatGPT)
df = pd.read_excel("/content/drive/MyDrive/Customer_Churn_Dataset.xlsx")
# Features and target
X = df[['age', 'monthly_usage_hours', 'purchase_amount', 'customer_service_calls',
'region']]
y = df['churn']
# Preprocessing: Scale numerical features and one-hot encode categorical features
preprocessor = ColumnTransformer(
transformers=[
('num', StandardScaler(), ['age', 'monthly_usage_hours', 'purchase_amount',
'customer_service_calls']),
('cat', OneHotEncoder(sparse_output=False), ['region'])
])
# Create pipeline with preprocessing and model
model = Pipeline(steps=[
('preprocessor', preprocessor),
('classifier', LogisticRegression(random_state=42, max_iter=1000))
])
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,
random_state=42, stratify=y)
# Train model
model.fit(X_train, y_train)
# Evaluate model
from sklearn.metrics import confusion_matrix
y_pred = model.predict(X_test)
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
# Predict churn probability for a new customer
new_customer = pd.DataFrame({
'age': [35],
'monthly_usage_hours': [20],
'purchase_amount': [150],
'customer_service_calls': [5],
'region': ['West']
})
churn_probability = model.predict_proba(new_customer)[0][1] # Probability of churn # (class 1)
# Classify based on threshold (0.5)
threshold = 0.5
churn_prediction = 1 if churn_probability > threshold else 0
print(f"Churn Probability for new customer: {churn_probability:.2f}")
print(f"Churn Prediction (1 = churn, 0 = no churn): {churn_prediction}")
# Display model coefficients
feature_names = (model.named_steps['preprocessor']
.named_transformers_['cat']
.get_feature_names_out(['region'])).tolist() + ['age',
'monthly_usage_hours', 'purchase_amount', 'customer_service_calls']
coefficients = model.named_steps['classifier'].coef_[0]
print("\nModel Coefficients:")
for feature, coef in zip(feature_names, coefficients):
    print(f"{feature}: {coef:.2f}")

# Churn probability represents the likelihood that a customer will stop using a
# companyâ€™s product or service within a given period. It is usually expressed
# as a number between 0 and 1 (or 0% to 100%).This probability is calculated
# using customer features such as usage patterns, service calls, tenure, and
# purchase behavior, allowing businesses to estimate risk before the customer
# actually churns or leaves.

# Businesses can use churn probability to take proactive action and reduce
# customer loss. By identifying high-risk customers early, companies can target
# them with retention strategies such as personalized offers, loyalty rewards,
# improved customer support, or discounts. Instead of treating all customers
# the same, businesses can focus resources on those most likely to leave,
# improving retention rates, increasing long-term revenue, and strengthening
# customer relationships.

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Confusion Matrix:
[[91 10]
 [44  5]]
Churn Probability for new customer: 0.31
Churn Prediction (1 = churn, 0 = no churn): 0

Model Coefficients:
region_East: -0.10
region_North: 0.03
region_South: -0.06
region_West: 0.40
age: -0.40
monthly_usage_hours: -0.00
purchase_amount: 0.53
customer_service_calls: -0.12
