# DAY 1 :

In [56]:
import numpy as np
import pandas as pd

In [57]:
df = pd.read_csv('customer_churn.csv')

In [33]:
df.shape

(7043, 21)

In [34]:
df.columns

Index(['customerID', 'gender', 'SeniorCitizen', 'Partner', 'Dependents',
       'tenure', 'PhoneService', 'MultipleLines', 'InternetService',
       'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport',
       'StreamingTV', 'StreamingMovies', 'Contract', 'PaperlessBilling',
       'PaymentMethod', 'MonthlyCharges', 'TotalCharges', 'Churn'],
      dtype='object')

In [35]:
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


In [36]:
df.isnull().sum()

customerID          0
gender              0
SeniorCitizen       0
Partner             0
Dependents          0
tenure              0
PhoneService        0
MultipleLines       0
InternetService     0
OnlineSecurity      0
OnlineBackup        0
DeviceProtection    0
TechSupport         0
StreamingTV         0
StreamingMovies     0
Contract            0
PaperlessBilling    0
PaymentMethod       0
MonthlyCharges      0
TotalCharges        0
Churn               0
dtype: int64

In [37]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7043 entries, 0 to 7042
Data columns (total 21 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   customerID        7043 non-null   object 
 1   gender            7043 non-null   object 
 2   SeniorCitizen     7043 non-null   int64  
 3   Partner           7043 non-null   object 
 4   Dependents        7043 non-null   object 
 5   tenure            7043 non-null   int64  
 6   PhoneService      7043 non-null   object 
 7   MultipleLines     7043 non-null   object 
 8   InternetService   7043 non-null   object 
 9   OnlineSecurity    7043 non-null   object 
 10  OnlineBackup      7043 non-null   object 
 11  DeviceProtection  7043 non-null   object 
 12  TechSupport       7043 non-null   object 
 13  StreamingTV       7043 non-null   object 
 14  StreamingMovies   7043 non-null   object 
 15  Contract          7043 non-null   object 
 16  PaperlessBilling  7043 non-null   object 


In [38]:
# Convert TotalCharges to numeric
df['TotalCharges'] = pd.to_numeric(df['TotalCharges'], errors='coerce')

# Day 2 – Feature Engineering

In [39]:
# Create tenure groups
bins = [0, 12, 24, 48, 60, 72]
labels = ['0-12', '12-24', '24-48', '48-60', '60-72']
df['TenureGroup'] = pd.cut(df['tenure'], bins=bins, labels=labels)

In [40]:
# Calculate Lifetime Value (LTV)
df['LTV'] = df['MonthlyCharges'] * df['tenure']

In [41]:
# Segment LTV
df['LTV_Group'] = pd.qcut(df['LTV'], q=4, labels=['Low', 'Medium', 'High', 'Very High'])


In [42]:
df[['tenure', 'TenureGroup', 'LTV', 'LTV_Group']].head()

Unnamed: 0,tenure,TenureGroup,LTV,LTV_Group
0,1,0-12,29.85,Low
1,34,24-48,1936.3,High
2,2,0-12,107.7,Low
3,45,24-48,1903.5,High
4,2,0-12,141.4,Low


### Encode categorical features

In [43]:
from sklearn.preprocessing import LabelEncoder

In [52]:
df_encoded = pd.get_dummies(df, drop_first=True)

In [53]:
df_encoded.head()

Unnamed: 0,SeniorCitizen,tenure,MonthlyCharges,TotalCharges,LTV,customerID_0003-MKNFE,customerID_0004-TLHLJ,customerID_0011-IGKFF,customerID_0013-EXCHZ,customerID_0013-MHZWF,...,PaymentMethod_Electronic check,PaymentMethod_Mailed check,Churn_Yes,TenureGroup_12-24,TenureGroup_24-48,TenureGroup_48-60,TenureGroup_60-72,LTV_Group_Medium,LTV_Group_High,LTV_Group_Very High
0,0,1,29.85,29.85,29.85,False,False,False,False,False,...,True,False,False,False,False,False,False,False,False,False
1,0,34,56.95,1889.5,1936.3,False,False,False,False,False,...,False,True,False,False,True,False,False,False,True,False
2,0,2,53.85,108.15,107.7,False,False,False,False,False,...,False,True,True,False,False,False,False,False,False,False
3,0,45,42.3,1840.75,1903.5,False,False,False,False,False,...,False,False,False,False,True,False,False,False,True,False
4,0,2,70.7,151.65,141.4,False,False,False,False,False,...,True,False,True,False,False,False,False,False,False,False


# Day 3 – Model Training (Logistic Regression & Random Forest)

In [54]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [55]:
X = df_encoded.drop(['customerID', 'Churn'], axis=1)
y = df_encoded['Churn']

KeyError: "['customerID', 'Churn'] not found in axis"

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
lr = LogisticRegression(max_iter=1000)

In [None]:
lr.fit(X_train, y_train)