# Machine Learning Based Recommendation Systems
## Classification-based Collaborative Filtering Systems
## Logistic Regression as a Classifier

In [25]:
import numpy as np
import pandas as pd

from pandas import Series, DataFrame
from sklearn.linear_model import LogisticRegression

This bank marketing dataset is open-sourced and available for download at the UCI Machine Learning Repository (https://archive.ics.uci.edu/ml/datasets/Bank+Marketing#).

It was originally created by: [Moro et al., 2014] S. Moro, P. Cortez and P. Rita. A Data-Driven Approach to Predict the Success of Bank Telemarketing. Decision Support Systems, Elsevier, 62:22-31, June 2014

In [26]:
bank_full = pd.read_csv('bank_full_w_dummy_vars.csv')
bank_full.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,...,job_unknown,job_retired,job_services,job_self_employed,job_unemployed,job_maid,job_student,married,single,divorced
0,58,management,married,tertiary,no,2143,yes,no,unknown,5,...,0,0,0,0,0,0,0,1,0,0
1,44,technician,single,secondary,no,29,yes,no,unknown,5,...,0,0,0,0,0,0,0,0,1,1
2,33,entrepreneur,married,secondary,no,2,yes,yes,unknown,5,...,0,0,0,0,0,0,0,1,0,0
3,47,blue-collar,married,unknown,no,1506,yes,no,unknown,5,...,0,0,0,0,0,0,0,1,0,0
4,33,unknown,single,unknown,no,1,no,no,unknown,5,...,1,0,0,0,0,0,0,0,1,1


In [27]:
bank_full.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 45211 entries, 0 to 45210
Data columns (total 37 columns):
age                             45211 non-null int64
job                             45211 non-null object
marital                         45211 non-null object
education                       45211 non-null object
default                         45211 non-null object
balance                         45211 non-null int64
housing                         45211 non-null object
loan                            45211 non-null object
contact                         45211 non-null object
day                             45211 non-null int64
month                           45211 non-null object
duration                        45211 non-null int64
campaign                        45211 non-null int64
pdays                           45211 non-null int64
previous                        45211 non-null int64
poutcome                        45211 non-null object
y                               45

In [28]:
bank_full.columns

Index(['age', 'job', 'marital', 'education', 'default', 'balance', 'housing',
       'loan', 'contact', 'day', 'month', 'duration', 'campaign', 'pdays',
       'previous', 'poutcome', 'y', 'y_binary                    ',
       'housing_loan                ', 'credit_in_default', 'personal_loans',
       'prev_failed_to_subscribe    ', 'prev_subscribed             ',
       'job_management              ', 'job_tech                    ',
       'job_entrepreneur            ', 'job_bluecollar              ',
       'job_unknown                 ', 'job_retired                 ',
       'job_services                ', 'job_self_employed           ',
       'job_unemployed              ', 'job_maid                    ',
       'job_student                 ', 'married                     ',
       'single                      ', 'divorced                    '],
      dtype='object')

In [29]:
# select features X start from housing_loan to divorced
X = bank_full.iloc[:, [18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36]]  
X.head()

Unnamed: 0,housing_loan,credit_in_default,personal_loans,prev_failed_to_subscribe,prev_subscribed,job_management,job_tech,job_entrepreneur,job_bluecollar,job_unknown,job_retired,job_services,job_self_employed,job_unemployed,job_maid,job_student,married,single,divorced
0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0
1,1,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1
2,1,0,1,0,0,0,0,1,0,0,0,0,0,0,0,0,1,0,0
3,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,0,0
4,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1


In [30]:
# y_binary output: has the client subscribed yes/no in binary
y = bank_full.iloc[:,17].values
y

array([0, 0, 0, ..., 1, 0, 0], dtype=int64)

In [31]:
# Note that 
# y='no'  is y_binary=0 
# y='yes' is y_binary=1 
bank_full.iloc[:, [16,17]].tail()

Unnamed: 0,y,y_binary
45206,yes,1
45207,yes,1
45208,yes,1
45209,no,0
45210,no,0


In [32]:
LogReg = LogisticRegression()
LogReg.fit(X, y)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)

In [33]:
# predict whether new_user01 with input feature will accept and offer

new_user01 = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
# input of predict(X) need to be a 2d array 
new_user01 = np.array(new_user01).reshape((len(new_user01), 1))
new_user01 = new_user01.T

y_pred = LogReg.predict(new_user01)
y_pred.item(0)

0

In [34]:
# predict whether new_user01 with input feature will accept and offer

new_user02 = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# input of predict(X) need to be a 2d array 
new_user02 = np.array(new_user02).reshape((len(new_user02), 1))
new_user02 = new_user02.T

y_pred = LogReg.predict(new_user02)
y_pred.item(0)

1

### Save model to file using sklearn.externals.joblib

In [35]:
from sklearn.externals import joblib
joblib.dump(LogReg,'termDepositRecommender.joblib')

['termDepositRecommender.pickle']

### Load model and test

In [36]:
from sklearn.externals import joblib
loadtermDepositRecommender = joblib.load('termDepositRecommender.joblib')

In [37]:
new_user01 = [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1]
# input of predict(X) need to be a 2d array 
new_user01 = np.array(new_user01).reshape((len(new_user01), 1))
new_user01 = new_user01.T

y_pred = loadtermDepositRecommender.predict(new_user01)
y_pred.item(0)



0

In [38]:
new_user02 = [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
# input of predict(X) need to be a 2d array 
new_user02 = np.array(new_user02).reshape((len(new_user02), 1))
new_user02 = new_user02.T

y_pred = loadtermDepositRecommender.predict(new_user02)
y_pred.item(0)

1