# Pipeline

Runs with pickle file of model. Make sure model is in same folder as this notebook.

1. Takes a dataset for customers that have not churned (**lacks a Churn column**) and predicts churn using our classification model
2. Exports prediction table as a csv


In [7]:
import pandas as pd
import pickle
from xgboost import XGBClassifier
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.compose import ColumnTransformer

In [4]:
teledf = pd.read_csv('-insert filepath to dataset here', index_col=0)

Unnamed: 0,Total day minutes,Area code_408,Number vmail messages,Churn,Total eve charge,Area code_415,Area code_510,Total day calls,Total intl calls,Total eve minutes,Total intl charge,International plan,Customer service calls,Voice mail plan,Total intl minutes,Total day charge
0,265.1,0.0,25.0,0.0,16.78,1.0,0.0,110.0,3.0,197.4,2.7,0.0,1.0,1.0,10.0,45.07
1,161.6,0.0,26.0,0.0,16.62,1.0,0.0,123.0,3.0,195.5,3.7,0.0,1.0,1.0,13.7,27.47
2,243.4,0.0,0.0,0.0,10.3,1.0,0.0,114.0,5.0,121.2,3.29,0.0,0.0,0.0,12.2,41.38
3,299.4,1.0,0.0,0.0,5.26,0.0,0.0,71.0,7.0,61.9,1.78,1.0,2.0,0.0,6.6,50.9
4,166.7,0.0,0.0,0.0,12.61,1.0,0.0,113.0,3.0,148.3,2.73,1.0,3.0,0.0,10.1,28.34


In [None]:
# applies transformations to dataset to match model requirements

cat_var = ['State','Area code']
bin_var = ['International plan', 'Voice mail plan']
non_cont = cat_var+bin_var
cont_var = list(teledf.drop(columns=non_cont).columns)

In [None]:
data_transformer = ColumnTransformer(transformers=[
    ('cont', 'passthrough', cont_var),
    ('binary', OrdinalEncoder(), bin_var),
    ('nominal', OneHotEncoder(sparse=False), cat_var)],
                                      remainder='drop')

data_transformer.fit(teledf)

In [None]:
nom_name=data_transformer.named_transformers_['nominal'].categories_

transformed_nomcat = []

for col, name in zip(cat_var, nom_name):
    for i in name:
        transformed_nomcat.append('_'.join([col,str(i)]))

In [None]:
teledf_trans = data_transformer.transform(teledf)

trans_col = cont_var+bin_var+transformed_nomcat

teledf_trans = pd.DataFrame(data=teledf_trans, columns=trans_col)

In [None]:
# keeps only columns from feature selection

teledf = teledf_trans ['Total day calls',
                       'Total intl charge',
                       'International plan',
                       'Total eve charge',
                       'Area code_408',
                       'Area code_415',
                       'Total eve minutes',
                       'Total intl calls',
                       'Total intl minutes',
                       'Total day minutes',
                       'Number vmail messages',
                       'Voice mail plan',
                       'Total day charge',
                       'Customer service calls',
                       'Area code_510']

In [14]:
X = teledf

In [27]:
loaded_model = pickle.load(open('final_model', 'rb'))
result = loaded_model.predict(X)

In [32]:
result = pd.DataFrame(result).rename(columns={0:'Churn'})
teledf_predict = X.join(result)
teledf_predict.to_csv('tele_churn_pred.csv')