In [1]:
!which python3

/Users/karthikmaddukuri/anaconda3/envs/churn-model/bin/python3


In [2]:
%load_ext autoreload
%autoreload 2

In [41]:
# Standard imports
import os
import pickle
import sys
sys.path.append('..')  #To let jupyter notebook to look one folder up

#Third-party imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score

#Local imports
from src.localpaths import *
from src.data.make_dataset import load_training_data
from src.models.train_model import *

In [4]:
#Configuration for notebooks

# pd.options.display.max_columns = 100
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

# Load Featurized Data 

In [5]:
X_train, y_train = load_training_data(final=True)

In [6]:
X_train.head()

Unnamed: 0,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,PaperlessBilling,MonthlyCharges,TotalCharges,MultipleLines_No phone service,MultipleLines_Yes,InternetService_Fiber optic,InternetService_No,OnlineSecurity_No internet service,OnlineSecurity_Yes,OnlineBackup_No internet service,OnlineBackup_Yes,DeviceProtection_No internet service,DeviceProtection_Yes,TechSupport_No internet service,TechSupport_Yes,StreamingTV_No internet service,StreamingTV_Yes,StreamingMovies_No internet service,StreamingMovies_Yes,Contract_One year,Contract_Two year,PaymentMethod_Credit card (automatic),PaymentMethod_Electronic check,PaymentMethod_Mailed check
0,0,0,0,1,1,0,1,25.3,25.3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,1,0,0,0,7,1,1,75.15,525.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0
2,1,0,0,1,4,1,1,20.05,85.5,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1,0,0,0,29,1,1,76.0,2215.25,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,1,0,0,0,3,1,1,75.1,270.7,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0


# Load Model Results

In [46]:
model_results = load_model_results()

In [47]:
model_results

Unnamed: 0,model_filename,model_string,accuracy,precision,recall,f1,roc_auc
0,1462994562544665856.pkl,"LogisticRegression(C=1.0, class_weight=None, d...",0.795609,,,,
1,1854848146051119104.pkl,"LogisticRegression(C=1.0, class_weight=None, d...",0.795609,,,,
2,1701978539907614976.pkl,"LogisticRegression(C=1.0, class_weight=None, d...",0.795609,,,,
3,2053777711421553920.pkl,"LogisticRegression(C=1.0, class_weight=None, d...",0.801966,0.653366,0.529496,0.584391,0.840393


# Loading Last Model (Cross-val)

In [16]:
with open(os.path.join(MODELS_DIRECTORY, '2053777711421553920.pkl'), 'rb') as f:
    model = pickle.load(f)

In [17]:
model

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=1000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

# Exploring Model weights

In [40]:
sorted(list(zip(X_train.columns, model.coef_[0])), key= lambda x: abs(x[1]), reverse=True)

[('Contract_Two year', -0.7441145539204811),
 ('OnlineSecurity_Yes', -0.6104283779463289),
 ('InternetService_Fiber optic', 0.5691502400499318),
 ('TechSupport_Yes', -0.5201295644743463),
 ('PhoneService', -0.44628012191709593),
 ('PaperlessBilling', 0.38773654480644426),
 ('Contract_One year', -0.38654473804025236),
 ('MultipleLines_No phone service', 0.2736707838828287),
 ('OnlineBackup_Yes', -0.2634116271114536),
 ('PaymentMethod_Electronic check', 0.22902831319076275),
 ('StreamingMovies_Yes', 0.2273415705542164),
 ('MultipleLines_Yes', 0.2247367538426381),
 ('SeniorCitizen', 0.21382083100056062),
 ('Dependents', -0.1556907861104303),
 ('PaymentMethod_Credit card (automatic)', -0.150746052786493),
 ('InternetService_No', -0.12254909783756332),
 ('OnlineSecurity_No internet service', -0.12254909783756332),
 ('OnlineBackup_No internet service', -0.12254909783756332),
 ('DeviceProtection_No internet service', -0.12254909783756332),
 ('TechSupport_No internet service', -0.1225490978375