### Import Dependencies

In [1]:
import os
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

### Loading the Data

In [2]:
df = pd.read_csv('data/processed/5_feature_binned.csv')

In [3]:
nominal_cols = ["gender", "SeniorCitizen", "Partner", "Dependents",
                "PhoneService", "MultipleLines", "InternetService",
                "OnlineSecurity", "OnlineBackup", "DeviceProtection",
                "TechSupport", "StreamingTV", "StreamingMovies",
                "PaperlessBilling", "PaymentReliabilityIndicator"]

ordinal_cols = ["Contract", "TenureBins"]

In [4]:
df_encoded = df.copy()
df_encoded

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,PhoneService,MultipleLines,InternetService,OnlineSecurity,OnlineBackup,DeviceProtection,...,StreamingMovies,Contract,PaperlessBilling,MonthlyCharges,TotalCharges,Churn,ServiceAdoptionScore,AvgChargePerService,TenureBins,PaymentReliabilityIndicator
0,Female,0,Yes,No,No,No phone service,DSL,No,Yes,No,...,No,Month-to-month,Yes,29.85,29.85,0,1,14.925000,New,Unreliable
1,Male,0,No,No,Yes,No,DSL,Yes,No,Yes,...,No,One year,No,56.95,1889.50,0,2,18.983333,Established,Unreliable
2,Male,0,No,No,Yes,No,DSL,Yes,Yes,No,...,No,Month-to-month,Yes,53.85,108.15,1,2,17.950000,New,Unreliable
3,Male,0,No,No,No,No phone service,DSL,Yes,No,Yes,...,No,One year,No,42.30,1840.75,0,3,10.575000,Established,Reliable
4,Female,0,No,No,Yes,No,Fiber optic,No,No,No,...,No,Month-to-month,Yes,70.70,151.65,1,0,70.700000,New,Unreliable
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,Male,0,Yes,Yes,Yes,Yes,DSL,Yes,No,Yes,...,Yes,One year,Yes,84.80,1990.50,0,5,14.133333,Established,Unreliable
7039,Female,0,Yes,Yes,Yes,Yes,Fiber optic,No,Yes,Yes,...,Yes,One year,Yes,103.20,7362.90,0,4,20.640000,Loyal,Reliable
7040,Female,0,Yes,Yes,No,No phone service,DSL,Yes,No,No,...,No,Month-to-month,Yes,29.60,346.45,0,1,14.800000,New,Unreliable
7041,Male,1,Yes,No,Yes,Yes,Fiber optic,No,No,No,...,No,Month-to-month,Yes,74.40,306.60,1,0,74.400000,New,Unreliable


### Nominal Columns

In [5]:
for col in nominal_cols:
    dummies = pd.get_dummies(df[col], prefix=col,drop_first=True)
    df_encoded = pd.concat([df_encoded, dummies], axis=1)
    df_encoded.drop(col, axis=1, inplace=True)

df_encoded

Unnamed: 0,Contract,MonthlyCharges,TotalCharges,Churn,ServiceAdoptionScore,AvgChargePerService,TenureBins,gender_Male,SeniorCitizen_1,Partner_Yes,...,DeviceProtection_No internet service,DeviceProtection_Yes,TechSupport_No internet service,TechSupport_Yes,StreamingTV_No internet service,StreamingTV_Yes,StreamingMovies_No internet service,StreamingMovies_Yes,PaperlessBilling_Yes,PaymentReliabilityIndicator_Unreliable
0,Month-to-month,29.85,29.85,0,1,14.925000,New,False,False,True,...,False,False,False,False,False,False,False,False,True,True
1,One year,56.95,1889.50,0,2,18.983333,Established,True,False,False,...,False,True,False,False,False,False,False,False,False,True
2,Month-to-month,53.85,108.15,1,2,17.950000,New,True,False,False,...,False,False,False,False,False,False,False,False,True,True
3,One year,42.30,1840.75,0,3,10.575000,Established,True,False,False,...,False,True,False,True,False,False,False,False,False,False
4,Month-to-month,70.70,151.65,1,0,70.700000,New,False,False,False,...,False,False,False,False,False,False,False,False,True,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,One year,84.80,1990.50,0,5,14.133333,Established,True,False,True,...,False,True,False,True,False,True,False,True,True,True
7039,One year,103.20,7362.90,0,4,20.640000,Loyal,False,False,True,...,False,True,False,False,False,True,False,True,True,False
7040,Month-to-month,29.60,346.45,0,1,14.800000,New,False,False,True,...,False,False,False,False,False,False,False,False,True,True
7041,Month-to-month,74.40,306.60,1,0,74.400000,New,True,True,True,...,False,False,False,False,False,False,False,False,True,True


### Ordinal Columns


In [6]:
df_encoded['TenureBins'] = df_encoded['TenureBins'].map({'New': 0, 'Established': 1, 'Loyal': 2})
df_encoded['Contract'] = df_encoded['Contract'].map({'Month-to-month': 0, 'One year': 1, 'Two year': 2})

In [7]:
df_encoded.to_csv('data/processed/6_feature_encoded.csv', index=False)