In [1]:
from lumpia.load_data.load_data import read_data
from lumpia.model.model import train_model
from lumpia.pre_processing.pre_processing import drop_nan, fill_mean
from lumpia.train_test.train_test import split
from lumpia.evaluation.evaluation import get_roc_auc_score
from lumpia.features.features import gen_dummies, normalize, take_log
import numpy as np

import warnings
warnings.filterwarnings("ignore")

In [2]:
#Pre-processing: Data cleansing for proper usage

churn_raw = read_data('churn_data.csv')
churn_df = drop_nan(churn_raw, ['DataUsage','ContractRenewal'])

In [9]:
#Features: Creation of features depending on the database's information

churn_df = gen_dummies(churn_df, ['DataPlan','ContractRenewal'])
#Remember that our dummies function deletes the original column to be 'dummied'

to_norm = ['AccountWeeks','DataUsage','CustServCalls','DayMins','DayCalls','MonthlyCharge','RoamMins']
norm_df = normalize(churn_df, to_norm)

norm_df = take_log(norm_df, ['OverageFee'])

Unnamed: 0,AccountWeeks,DataUsage,CustServCalls,DayMins,DayCalls,MonthlyCharge,RoamMins,Churn,OverageFee,DataPlan_1,ContractRenewal_1
0,0.676489,1.480204,-0.427932,1.566767,0.476643,1.990727,-0.085008,0,2.2895,1,1
1,0.149065,2.266072,-0.427932,-0.333738,1.124503,1.56451,1.240482,0,2.280339,1,1
2,0.902529,-0.641642,-1.188218,1.168304,0.675985,-0.262133,0.703121,0,1.80171,0,1
3,-0.42859,-0.641642,0.332354,2.196596,-1.466936,0.042307,-1.303026,0,1.131402,0,0
4,-0.654629,-0.641642,1.092641,-0.24009,0.626149,-0.931902,-0.049184,0,2.004179,0,0


In [4]:
print(norm_df.columns)

Index(['AccountWeeks', 'DataUsage', 'CustServCalls', 'DayMins', 'DayCalls',
       'MonthlyCharge', 'RoamMins', 'Churn', 'OverageFee', 'DataPlan_1',
       'ContractRenewal_1'],
      dtype='object')


In [5]:
#Split: Partition the dataset for training and testing a set model
X_train, X_test, y_train, y_test = split(norm_df)

In [6]:
#Trainer: Definition of a model from the dataset to make predictions

target_cols = ['Churn']
feature_cols = list(norm_df.columns).remove('Churn')

results = train_model(X_train, X_test, y_train, y_test, feature_cols, target_cols)

In [7]:
#Assessment: Evaluation of the model's performance with a given accuracy computation

train_auc, test_auc = get_roc_auc_score(results)