In [46]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import warnings
warnings.filterwarnings('ignore')

In [47]:
df = pd.read_csv('dataset/WA_Fn-UseC_-Telco-Customer-Churn.csv')

## Data Preprocessing

In [48]:
df.drop("customerID", axis=1, inplace=True)

In [49]:
df.dtypes

gender               object
SeniorCitizen         int64
Partner              object
Dependents           object
tenure                int64
PhoneService         object
MultipleLines        object
InternetService      object
OnlineSecurity       object
OnlineBackup         object
DeviceProtection     object
TechSupport          object
StreamingTV          object
StreamingMovies      object
Contract             object
PaperlessBilling     object
PaymentMethod        object
MonthlyCharges      float64
TotalCharges         object
Churn                object
dtype: object

In [50]:
df = df[df.TotalCharges!=' ']

df['TotalCharges'] = df['TotalCharges'].astype(float)

In [51]:
for column in df:
  if df[column].dtypes == 'object':
    print(f'{column}: {df[column].unique()}')

gender: ['Female' 'Male']
Partner: ['Yes' 'No']
Dependents: ['No' 'Yes']
PhoneService: ['No' 'Yes']
MultipleLines: ['No phone service' 'No' 'Yes']
InternetService: ['DSL' 'Fiber optic' 'No']
OnlineSecurity: ['No' 'Yes' 'No internet service']
OnlineBackup: ['Yes' 'No' 'No internet service']
DeviceProtection: ['No' 'Yes' 'No internet service']
TechSupport: ['No' 'Yes' 'No internet service']
StreamingTV: ['No' 'Yes' 'No internet service']
StreamingMovies: ['No' 'Yes' 'No internet service']
Contract: ['Month-to-month' 'One year' 'Two year']
PaperlessBilling: ['Yes' 'No']
PaymentMethod: ['Electronic check' 'Mailed check' 'Bank transfer (automatic)'
 'Credit card (automatic)']
Churn: ['No' 'Yes']


In [52]:
# Normalizing text categories
df = df.replace('No internet service', 'No')
df = df.replace('No phone service', 'No')

In [53]:
X = df.drop("Churn", axis=1)
y = df["Churn"].replace({'Yes': 1, 'No': 0})

### Building Transformers

In [54]:
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [55]:
numeric_features = ['tenure', 'MonthlyCharges', 'TotalCharges']

binary_features = ['Partner', 'Dependents', 'PhoneService', 'MultipleLines', 'OnlineSecurity', 'OnlineBackup', 'DeviceProtection', 'TechSupport', 'StreamingTV', 'StreamingMovies','PaperlessBilling']

gender_feature = ['gender']

categorical_features = ['InternetService', 'Contract', 'PaymentMethod']

In [56]:
binary_transformer = OneHotEncoder(drop='if_binary', handle_unknown='ignore')
numeric_transformer = MinMaxScaler()
categorical_transformer = OneHotEncoder(handle_unknown='ignore')

In [57]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, numeric_features),
        ('bin', binary_transformer, binary_features + gender_feature),
        ('cat', categorical_transformer, categorical_features),
    ]
)

## Training a Random Forest model

In [65]:
from sklearn.ensemble import RandomForestClassifier

model = RandomForestClassifier(random_state=42)

In [66]:
pipeline = Pipeline(
    steps=[
        ('preprocessor', preprocessor),
        ('model', model)
    ]
)

In [67]:
pipeline.fit(X, y)

In [68]:
import joblib

joblib.dump(pipeline, "churn_pipeline.pkl")

['churn_pipeline.pkl']