### Import Dependencies

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.base import BaseEstimator, TransformerMixin

### Loading the Data

In [2]:
df = pd.read_csv('data/processed/4_feature_engineered.csv')
df

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,...,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn,ServiceAdoptionScore,AvgChargePerService
0,Female,0,Yes,No,1,No,No phone service,DSL,No,Yes,...,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,0,1,14.925000
1,Male,0,No,No,34,Yes,No,DSL,Yes,No,...,No,No,One year,No,Mailed check,56.95,1889.50,0,2,18.983333
2,Male,0,No,No,2,Yes,No,DSL,Yes,Yes,...,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,1,2,17.950000
3,Male,0,No,No,45,No,No phone service,DSL,Yes,No,...,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,0,3,10.575000
4,Female,0,No,No,2,Yes,No,Fiber optic,No,No,...,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,1,0,70.700000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,No,...,Yes,Yes,One year,Yes,Mailed check,84.80,1990.50,0,5,14.133333
7039,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,Yes,...,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.90,0,4,20.640000
7040,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,No,...,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,0,1,14.800000
7041,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,No,...,No,No,Month-to-month,Yes,Mailed check,74.40,306.60,1,0,74.400000


### Feature Binning

In [3]:
bins = [0, 12, 48, float('inf')]
labels = ['New', 'Established', 'Loyal']

df['TenureBins'] = pd.cut(df['tenure'], bins=bins, labels=labels, right= True, include_lowest=True)


In [4]:
df['PaymentReliabilityIndicator'] = df['PaymentMethod'].map({'Electronic check': 'Unreliable', 'Mailed check': 'Unreliable' , 'Bank transfer (automatic)': 'Reliable', 'Credit card (automatic)': 'Reliable'})

In [5]:
df.drop(columns=['tenure', 'PaymentMethod'], inplace=True)

In [6]:
df

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,ServiceAdoptionScore,AvgChargePerService,TenureBins,PaymentReliabilityIndicator
0,Female,0,Yes,No,No,No phone service,DSL,No,Yes,No,...,No,Month-to-month,Yes,29.85,29.85,0,1,14.925000,New,Unreliable
1,Male,0,No,No,Yes,No,DSL,Yes,No,Yes,...,No,One year,No,56.95,1889.50,0,2,18.983333,Established,Unreliable
2,Male,0,No,No,Yes,No,DSL,Yes,Yes,No,...,No,Month-to-month,Yes,53.85,108.15,1,2,17.950000,New,Unreliable
3,Male,0,No,No,No,No phone service,DSL,Yes,No,Yes,...,No,One year,No,42.30,1840.75,0,3,10.575000,Established,Reliable
4,Female,0,No,No,Yes,No,Fiber optic,No,No,No,...,No,Month-to-month,Yes,70.70,151.65,1,0,70.700000,New,Unreliable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,Male,0,Yes,Yes,Yes,Yes,DSL,Yes,No,Yes,...,Yes,One year,Yes,84.80,1990.50,0,5,14.133333,Established,Unreliable
7039,Female,0,Yes,Yes,Yes,Yes,Fiber optic,No,Yes,Yes,...,Yes,One year,Yes,103.20,7362.90,0,4,20.640000,Loyal,Reliable
7040,Female,0,Yes,Yes,No,No phone service,DSL,Yes,No,No,...,No,Month-to-month,Yes,29.60,346.45,0,1,14.800000,New,Unreliable
7041,Male,1,Yes,No,Yes,Yes,Fiber optic,No,No,No,...,No,Month-to-month,Yes,74.40,306.60,1,0,74.400000,New,Unreliable


In [7]:
df.to_csv('data/processed/5_feature_binned.csv', index=False)