In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

#### Studying the dataset of bank
We will be using the dataset with all the 17 features for the purpose of understanding ensemble methods.

    age (numeric) - age of the bank customer
    job(categorical)- job of the bank customer
    marital(categorical)- marital status of the bank customer
    education(categorical)- Education status of the customer
    default(categorical) - Whether the customer has credit in default?
    balance (numeric) - average yearly balance in euros
    housing (categorical) - Whether the customer has a housing loan?
    loan(categorical)- Whether the customer has a personal loan?
    contact(categorical)- contact communication type
    day(numeric)- last contact date(of the month) of the year
    month(categorical)- last contact month of year
    day(categorical)- last contact day of the week (: 'mon','tue','wed','thu','fri')
    duration (numeric) - last contact duration, in seconds
    campaign (numeric) - number of contacts performed during this campaign and for this client
    pdays (numeric)- number of days that passed by after the client was last contacted from a previous campaign
    previous (numeric)- number of contacts performed before this campaign and for this client (numeric)
    
    Target: deposit - has the client subscribed a term deposit? (binary- 0: no, 1:yes)

In [2]:
data = pd.read_csv('bank.csv')

In [3]:
data.head()

Unnamed: 0,age,job,marital,education,default,balance,housing,loan,contact,day,month,duration,campaign,pdays,previous,poutcome,deposit
0,59,0,1,1,0,2343,1,0,2,5,8,1042,1,-1,0,3,1
1,56,0,1,1,0,45,0,0,2,5,8,1467,1,-1,0,3,1
2,41,9,1,1,0,1270,1,0,2,5,8,1389,1,-1,0,3,1
3,55,7,1,1,0,2476,1,0,2,5,8,579,1,-1,0,3,1
4,54,0,1,2,0,184,0,0,2,5,8,673,2,-1,0,3,1


In [4]:
#Different models initialised
log_clf_1 = LogisticRegression(random_state=0)
log_clf_2 = LogisticRegression(random_state=42)
decision_clf1 = DecisionTreeClassifier(criterion = 'entropy',
                                       random_state=0)
decision_clf2 = DecisionTreeClassifier(criterion = 'entropy', 
                                       random_state=42)

In [5]:
X = data.iloc[:, :-1]
y = data.iloc[:,-1]

In [6]:
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.3, 
                                                   random_state=0)

In [7]:
from sklearn.metrics import accuracy_score

In [8]:
# Logisitic Regression with random state 0
log_clf_1.fit(X_train, y_train)
y_pred = log_clf_1.predict(X_test)
accuracy_score(y_test, y_pred)



0.7951627351448194

In [9]:
# Logisitic Regression with random state 42
log_clf_2.fit(X_train, y_train)
y_pred = log_clf_2.predict(X_test)
accuracy_score(y_test, y_pred)



0.7951627351448194

In [10]:
#Decision Tree with random state 0
decision_clf1.fit(X_train,y_train)
decision_clf1.score(X_test,y_test)

0.7856076440728575

In [11]:
#Decision Tree with random state 42
decision_clf2.fit(X_train,y_train)
decision_clf2.score(X_test,y_test)

0.7796357121528814

In [12]:
#Creation of list of models
# this is required to be passed in the voting classifier
Model_List=[('Logistic Regression RS0', log_clf_1),
            ('Logistic Regression RS42', log_clf_2),
            ('Decision Tree RS0', decision_clf1),
            ('Decision Tree RS42', decision_clf2)]

In [13]:
# Importing the voting classifier
from sklearn.ensemble import VotingClassifier
# Hard Voting object
voting_clf_hard = VotingClassifier(estimators=Model_List,voting='hard')
voting_clf_hard.fit(X_train,y_train)



VotingClassifier(estimators=[('Logistic Regression RS0',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=0, solver='warn',
                                                 tol=0.0001, verbose=0,
                                                 warm_start=False)),
                             ('Logistic Regression RS42',
                              LogisticRegression(C=1.0, class_weight...
                             ('Decision Tree RS42',
                              DecisionTreeClassifier(class_weight=None,
                   

In [14]:
hard_voting_score=voting_clf_hard.score(X_test,y_test)
hard_voting_score

0.7709764108689161

In [15]:
# Soft voting object
voting_clf_soft = VotingClassifier(estimators=Model_List,voting='soft')
voting_clf_soft.fit(X_train,y_train)



VotingClassifier(estimators=[('Logistic Regression RS0',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='warn',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=0, solver='warn',
                                                 tol=0.0001, verbose=0,
                                                 warm_start=False)),
                             ('Logistic Regression RS42',
                              LogisticRegression(C=1.0, class_weight...
                             ('Decision Tree RS42',
                              DecisionTreeClassifier(class_weight=None,
                   

In [16]:
soft_voting_score=voting_clf_soft.score(X_test,y_test)
soft_voting_score

0.787996416840848