# Voting Classifiers

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
credit = pd.read_csv("CreditCardDefault.csv")
credit[:5]

Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,next_month_payment
0,1,20000,2,2,1,24,2,2,-1,-1,...,0,0,0,0,689,0,0,0,0,1
1,2,120000,2,2,2,26,-1,2,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,3,90000,2,2,2,34,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,4,50000,2,2,1,37,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,5,50000,1,2,1,57,-1,0,-1,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [3]:
credit.drop(["ID"], axis = 1, inplace = True)

In [4]:
credit[:5]

Unnamed: 0,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,PAY_5,...,BILL_AMT4,BILL_AMT5,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,next_month_payment
0,20000,2,2,1,24,2,2,-1,-1,-2,...,0,0,0,0,689,0,0,0,0,1
1,120000,2,2,2,26,-1,2,0,0,0,...,3272,3455,3261,0,1000,1000,1000,0,2000,1
2,90000,2,2,2,34,0,0,0,0,0,...,14331,14948,15549,1518,1500,1000,1000,1000,5000,0
3,50000,2,2,1,37,0,0,0,0,0,...,28314,28959,29547,2000,2019,1200,1100,1069,1000,0
4,50000,1,2,1,57,-1,0,-1,0,0,...,20940,19146,19131,2000,36681,10000,9000,689,679,0


In [5]:
credit.shape

(30000, 24)

In [6]:
X = credit.iloc[:,0:23]
y = credit.iloc[:,-1]

In [7]:
from sklearn.model_selection import train_test_split

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size = .3,
                                                    stratify = y)

In [9]:
from sklearn import preprocessing

In [10]:
scaler = preprocessing.StandardScaler().fit(X_train)

In [11]:
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [13]:
model1 = LogisticRegression()
model2 = DecisionTreeClassifier()
model3 = SVC()

In [14]:
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)

SVC()

In [15]:
model1.fit(X_train, y_train)

LogisticRegression()

In [16]:
model2.fit(X_train, y_train)

DecisionTreeClassifier()

In [17]:
model3.fit(X_train, y_train)

SVC()

In [18]:
model1_pred = model1.predict(X_test)
model2_pred = model1.predict(X_test)
model3_pred = model1.predict(X_test)

In [19]:
#checking the type of model1...

model1.__class__.__name__

'LogisticRegression'

In [20]:
#checking the type of model2...

model2.__class__.__name__

'DecisionTreeClassifier'

In [21]:
#checking the type of model3...

model3.__class__.__name__

'SVC'

In [22]:
from sklearn.metrics import accuracy_score

In [23]:
print(model1.__class__.__name__,accuracy_score(y_test, model1_pred))
print(model2.__class__.__name__,accuracy_score(y_test, model2_pred))
print(model3.__class__.__name__,accuracy_score(y_test, model3_pred))

LogisticRegression 0.8105555555555556
DecisionTreeClassifier 0.8105555555555556
SVC 0.8105555555555556


# Hard Voting

In [24]:
from sklearn.ensemble import VotingClassifier

In [25]:
help(VotingClassifier)

Help on class VotingClassifier in module sklearn.ensemble._voting:

class VotingClassifier(sklearn.base.ClassifierMixin, _BaseVoting)
 |  VotingClassifier(estimators, *, voting='hard', weights=None, n_jobs=None, flatten_transform=True, verbose=False)
 |  
 |  Soft Voting/Majority Rule classifier for unfitted estimators.
 |  
 |  Read more in the :ref:`User Guide <voting_classifier>`.
 |  
 |  .. versionadded:: 0.17
 |  
 |  Parameters
 |  ----------
 |  estimators : list of (str, estimator) tuples
 |      Invoking the ``fit`` method on the ``VotingClassifier`` will fit clones
 |      of those original estimators that will be stored in the class attribute
 |      ``self.estimators_``. An estimator can be set to ``'drop'``
 |      using ``set_params``.
 |  
 |      .. versionchanged:: 0.21
 |          ``'drop'`` is accepted. Using None was deprecated in 0.22 and
 |          support was removed in 0.24.
 |  
 |  voting : {'hard', 'soft'}, default='hard'
 |      If 'hard', uses predicted c

In [26]:
estimators = [("lr", model1),("dt",model2),("svm",model3)]
hardvoting = VotingClassifier(estimators, voting = "hard")

In [27]:
hardvoting.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('dt', DecisionTreeClassifier()), ('svm', SVC())])

In [28]:
hardvoting_pred = hardvoting.predict(X_test)

In [29]:
hardvoting_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [30]:
accuracy_score(y_test, hardvoting_pred)

0.8158888888888889

# Soft Voting

In [31]:
estimators = [("lr", model1),("dt",model2),("svm",model3)]
softvoting = VotingClassifier(estimators, voting = "hard")

In [32]:
softvoting.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('dt', DecisionTreeClassifier()), ('svm', SVC())])

In [33]:
softvoting_pred = softvoting.predict(X_test)

In [34]:
softvoting_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [35]:
#softvoting_pred = softvoting.predict_proba(X_test)

In [36]:
model4 = SVC(probability = True)
estimators = [("lr", model1),("dt",model2),("svm",model4)]
softvoting = VotingClassifier(estimators, voting = "hard")

In [37]:
softvoting.fit(X_train, y_train)

VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('dt', DecisionTreeClassifier()),
                             ('svm', SVC(probability=True))])

In [38]:
softvoting_pred = softvoting.predict(X_test)

In [39]:
#softvoting_pred = softvoting.predict_proba(X_test)

In [40]:
softvoting_pred

array([0, 0, 0, ..., 0, 0, 0], dtype=int64)

In [41]:
accuracy_score(y_test, softvoting_pred)

0.8164444444444444