# **Load Data**

In [2]:
import pandas as pd
df = pd.read_csv("/content/cleaned_customer_churn_data (3).csv")
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.3,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.7,151.65,Yes


# **Feature Engineering**

In [5]:
df['tenure_group'] = df['tenure'].apply(
    lambda x: 'Short' if x<12 else 'Medium' if x<36 else 'Long'
)

df['monthly_charge_level'] = df['MonthlyCharges'].apply(
    lambda x: 'Low' if x<35 else 'Medium' if x<70 else 'High'
)

df['is_long_term'] = df['tenure'].apply(lambda x: 1 if x>=24 else 0)

df['churn_flag'] = df['Churn'].apply(lambda x: 1 if x=='Yes' else 0)
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,tenure_group,monthly_charge_level,is_long_term,churn_flag
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,Month-to-month,Yes,Electronic check,29.85,29.85,No,Short,Low,0,0
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,One year,No,Mailed check,56.95,1889.5,No,Medium,Medium,1,0
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,Month-to-month,Yes,Mailed check,53.85,108.15,Yes,Short,Medium,0,1
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,One year,No,Bank transfer (automatic),42.3,1840.75,No,Long,Medium,1,0
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,Month-to-month,Yes,Electronic check,70.7,151.65,Yes,Short,High,0,1


# **Encoding**

In [8]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
df['tenure_group_encoded'] = le.fit_transform(df['tenure_group'])
df.head()

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,tenure_group,monthly_charge_level,is_long_term,churn_flag,tenure_group_encoded
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,Yes,Electronic check,29.85,29.85,No,Short,Low,0,0,2
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,No,Mailed check,56.95,1889.5,No,Medium,Medium,1,0,1
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,Yes,Mailed check,53.85,108.15,Yes,Short,Medium,0,1,2
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,No,Bank transfer (automatic),42.3,1840.75,No,Long,Medium,1,0,0
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,Yes,Electronic check,70.7,151.65,Yes,Short,High,0,1,2


# **Scaling**

In [10]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
df['MonthlyCharges_scaled'] = scaler.fit_transform(df[['MonthlyCharges']])
df

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,PaymentMethod,MonthlyCharges,TotalCharges,Churn,tenure_group,monthly_charge_level,is_long_term,churn_flag,tenure_group_encoded,MonthlyCharges_scaled
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,Electronic check,29.85,29.85,No,Short,Low,0,0,2,-1.160323
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Mailed check,56.95,1889.50,No,Medium,Medium,1,0,1,-0.259629
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,Mailed check,53.85,108.15,Yes,Short,Medium,0,1,2,-0.362660
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Bank transfer (automatic),42.30,1840.75,No,Long,Medium,1,0,0,-0.746535
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,Electronic check,70.70,151.65,Yes,Short,High,0,1,2,0.197365
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Mailed check,84.80,1990.50,No,Medium,High,1,0,1,0.665992
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Credit card (automatic),103.20,7362.90,No,Long,High,1,0,0,1.277533
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,Electronic check,29.60,346.45,No,Short,Low,0,0,2,-1.168632
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,Mailed check,74.40,306.60,Yes,Short,High,0,1,2,0.320338


# **Before vs After**

In [11]:
df[['MonthlyCharges','MonthlyCharges_scaled']].describe()

Unnamed: 0,MonthlyCharges,MonthlyCharges_scaled
count,7043.0,7043.0
mean,64.761692,-6.406285e-17
std,30.090047,1.000071
min,18.25,-1.54586
25%,35.5,-0.9725399
50%,70.35,0.1857327
75%,89.85,0.8338335
max,118.75,1.794352


# **Save CSV**

In [12]:
df.to_csv("churn_week4_feature_engineered.csv", index=False)