Step 1: Loading and Preparing the Data
1.1 Load the Data and Basic Inspection

In [None]:
# Importing necessary libraries
import pandas as pd

# Load dataset
telecom_data = pd.read_csv('telecom_churn_data.csv')
print(telecom_data.head())  # Display first few rows
print(telecom_data.info())  # Check data types and missing values
print(telecom_data.describe())  # Summary statistics


1.2 Filter High-Value Customers

In [None]:
# Calculate average recharge amount for months 6 and 7
telecom_data['avg_rech_amt_6_7'] = (telecom_data['total_rech_amt_6'] + telecom_data['total_rech_amt_7']) / 2

# 70th percentile of average recharge amount
percentile_70 = telecom_data['avg_rech_amt_6_7'].quantile(0.7)

# Filter high-value customers
high_value_customers = telecom_data[telecom_data['avg_rech_amt_6_7'] >= percentile_70]
print(f"Number of high-value customers: {high_value_customers.shape[0]}")


1.3 Tag Churners and Remove Churn Phase Attributes

In [None]:
# Tag churners (0: Non-churn, 1: Churn)
high_value_customers['churn'] = high_value_customers.apply(
    lambda row: 1 if (row['total_ic_mou_9'] == 0 and row['total_og_mou_9'] == 0 and 
                      row['vol_2g_mb_9'] == 0 and row['vol_3g_mb_9'] == 0) else 0, axis=1)

# Remove churn phase columns
churn_columns = [col for col in high_value_customers.columns if '_9' in col]
high_value_customers_cleaned = high_value_customers.drop(columns=churn_columns)
print(high_value_customers_cleaned['churn'].value_counts())


Step 2: Exploratory Data Analysis (EDA)
2.1 Visualize Churn Rate

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Plot churn rate
plt.figure(figsize=(6,4))
sns.countplot(x='churn', data=high_value_customers_cleaned)
plt.title("Churn Rate in High-Value Customers")
plt.show()


2.2 Analyze Recharge Amount vs. Churn

In [None]:
# Boxplot of recharge amount for churned vs non-churned customers
plt.figure(figsize=(8,6))
sns.boxplot(x='churn', y='avg_rech_amt_6_7', data=high_value_customers_cleaned)
plt.title("Average Recharge Amount (Months 6 and 7) vs Churn")
plt.show()


Step 4: Modeling and Evaluation
4.1 Train-Test Split

In [None]:
from sklearn.model_selection import train_test_split

# Define target and features
X = high_value_customers_cleaned.drop(columns=['churn'])
y = high_value_customers_cleaned['churn']

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


4.2 Logistic Regression with Class Imbalance Handling

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Initialize and train model with balanced class weights
model = LogisticRegression(class_weight='balanced', max_iter=1000, random_state=42)
model.fit(X_train, y_train)

# Predictions
y_pred = model.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
