In the previous session we trained a model for predicting churn and evaluated it. Now let's deploy it

In [7]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder

from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, accuracy_score

In [2]:
df = pd.read_csv('bank.csv', delimiter=';')
df.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,y
0,30,unemployed,married,primary,no,1787,no,no,cellular,19,oct,79,1,-1,0,unknown,no
1,33,services,married,secondary,no,4789,yes,yes,cellular,11,may,220,1,339,4,failure,no
2,35,management,single,tertiary,no,1350,yes,no,cellular,16,apr,185,1,330,1,failure,no
3,30,management,married,tertiary,no,1476,yes,yes,unknown,3,jun,199,4,-1,0,unknown,no
4,59,blue-collar,married,secondary,no,0,yes,no,unknown,5,may,226,1,-1,0,unknown,no


In [3]:
df_full_train, df_test = train_test_split(df, test_size=0.2, random_state=1)

In [5]:
X = df.drop(columns='y', axis=1)
# Y = df['y']
# le = LabelEncoder()
# Y = le.fit_transform(Y)

categorical_columns = list(X.dtypes[X.dtypes == 'object'].index)
numerical_columns = list(X.dtypes[X.dtypes == 'int64'].index)

In [16]:
numerical_columns

['age', 'balance', 'day', 'duration', 'campaign', 'pdays', 'previous']

In [6]:
# def whole_process(df):
#     pass

# def data_split(df_train, y_train, test_size=0.3):
#     X_train, X_test, Y_train, Y_test = train_test_split(df_train, y_train, test_size=test_size, shuffle=True, random_state=22)
#     return X_train, X_test, Y_train, Y_test

def train(df_train, y_train, C=1.0):
    dicts = df_train[categorical_columns + numerical_columns].to_dict(orient='records')

    dv = DictVectorizer(sparse=False)
    X_train = dv.fit_transform(dicts)

    model = LogisticRegression(C=C, max_iter=1000)
    model.fit(X_train, y_train)
    
    return dv, model

def predict(df, dv, model):
    dicts = df[categorical_columns + numerical_columns].to_dict(orient='records')

    X = dv.transform(dicts)
    y_pred = model.predict(X)

    return y_pred
# dv, model = train(X,Y)

In [9]:
C = 1.0
n_splits = 5

kfold = KFold(n_splits=n_splits, shuffle=True, random_state=1)

scores = []

for train_idx, val_idx in kfold.split(df_full_train):
    df_train = df_full_train.iloc[train_idx]
    df_val = df_full_train.iloc[val_idx]

    y_train = df_train['y']
    y_val = df_val['y']
    y_val = y_val.replace({'no': 0, 'yes': 1})
    

    dv, model = train(df_train, y_train, C=C)
    y_pred = predict(df_val, dv, model)

    auc = accuracy_score(y_val, y_pred)
    scores.append(auc)

print('C=%s %.3f +- %.3f' % (C, np.mean(scores), np.std(scores)))

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

C=1.0 0.904 +- 0.014


In [10]:
scores

[0.925414364640884,
 0.8852005532503457,
 0.9017980636237898,
 0.8948824343015215,
 0.9142461964038727]

In [18]:
dv, model = train(df_full_train, df_full_train.churn.values, C=1.0)
y_pred = predict(df_test, dv, model)

auc = roc_auc_score(y_test, y_pred)
auc

0.8572386167896259

Save the model

In [11]:
import pickle

In [12]:
output_file = f'model_C={C}.bin'

In [13]:
output_file

'model_C=1.0.bin'

In [14]:
f_out = open(output_file, 'wb') 
pickle.dump((dv, model), f_out)
f_out.close()

In [22]:
!ls -lh *.bin

-rwxrwxrwx 1 alexey alexey 2.5K Sep 30 14:10 'model_C=1.0.bin'


In [21]:
with open(output_file, 'wb') as f_out: 
    pickle.dump((dv, model), f_out)

Load the model

In [1]:
import pickle

In [2]:
input_file = 'model_C=1.0.bin'

In [4]:
with open(input_file, 'rb') as f_in: 
    dv, model = pickle.load(f_in)

In [8]:
model

LogisticRegression(max_iter=1000)

In [27]:
customer = {
    'gender': 'female',
    'seniorcitizen': 0,
    'partner': 'yes',
    'dependents': 'no',
    'phoneservice': 'no',
    'multiplelines': 'no_phone_service',
    'internetservice': 'dsl',
    'onlinesecurity': 'no',
    'onlinebackup': 'yes',
    'deviceprotection': 'no',
    'techsupport': 'no',
    'streamingtv': 'no',
    'streamingmovies': 'no',
    'contract': 'month-to-month',
    'paperlessbilling': 'yes',
    'paymentmethod': 'electronic_check',
    'tenure': 1,
    'monthlycharges': 29.85,
    'totalcharges': 29.85
}

In [28]:
X = dv.transform([customer])

In [29]:
y_pred = model.predict_proba(X)[0, 1]

In [30]:
print('input:', customer)
print('output:', y_pred)

input: {'gender': 'female', 'seniorcitizen': 0, 'partner': 'yes', 'dependents': 'no', 'phoneservice': 'no', 'multiplelines': 'no_phone_service', 'internetservice': 'dsl', 'onlinesecurity': 'no', 'onlinebackup': 'yes', 'deviceprotection': 'no', 'techsupport': 'no', 'streamingtv': 'no', 'streamingmovies': 'no', 'contract': 'month-to-month', 'paperlessbilling': 'yes', 'paymentmethod': 'electronic_check', 'tenure': 1, 'monthlycharges': 29.85, 'totalcharges': 29.85}
output: 0.5912433520805763


Making requests

In [22]:
import requests

In [23]:
url = 'http://localhost:9696/predict'

In [24]:
data = {
    "age": 30,	
    "job": "unemployed",	
    "marital": "married",	
    "education": "primary",	
    "default": "no",	
    "balance": 1787,	
    "housing": "no",	
    "loan": "no",	
    "contact": "cellular",	
    "day": 19,
    "month": "oct",
    "duration": 79,
    "campaign": 1,
    "pdays": -1,
    "previous": 0,
    "poutcome": "unknown"
}

In [33]:
response = requests.post(url, json=data).json()

In [34]:
response

{'Should we issue loan': 0, 'ans': 'do not give loan'}

In [27]:
if response['churn']:
    print('sending email to', 'asdx-123d')

sending email to asdx-123d
