## Voting for Classification 

### We will be using the IRIS dataset 


In [2]:
#Lets import the libraries and datasets 
import numpy as np

from sklearn import datasets
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

from sklearn.ensemble import VotingClassifier

import statistics as stat

In [3]:
#Lets load the dataset
iris = datasets.load_iris()
iris.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [4]:
X = iris.data[:, [0, 2]]
y = iris.target

In [5]:
#Lets check the shape of our data 
print(X.shape)
print(y.shape)

(150, 2)
(150,)


In [6]:
#Output classes
list(iris.target_names)

['setosa', 'versicolor', 'virginica']

In [7]:
#Lets split our data in train and test 
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=1)

In [8]:
#Lets check the shape 
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(112, 2)
(112,)
(38, 2)
(38,)


In [9]:
#We initialize the models
model1 = DecisionTreeClassifier(max_depth=4)
model2 = KNeighborsClassifier(n_neighbors=7)
model3 = SVC(kernel='rbf', probability=True)

In [10]:
#We then fit the models 
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train)

SVC(probability=True)

### Lets look at the accuracy for each model 

In [11]:
#The accuracy score for the first model is
score1=model1.score(X_test, y_test)
print(score1)

0.9736842105263158


In [12]:
#The accuracy score for the second model is
score2=model2.score(X_test, y_test)
print(score2)

0.9736842105263158


In [13]:
#The accuracy score for the third model is
score3=model3.score(X_test, y_test)
print(score3)

0.9736842105263158


### Lets combine the models using voting in python 

In [14]:
#Lets find out what each model predicts 
pred1=model1.predict(X_test)
pred2=model2.predict(X_test)
pred3=model3.predict(X_test)

In [15]:
#Lets take the vote using the mode function 
final_pred = np.array([])
for i in range(0,len(X_test)):
    final_pred = np.append(final_pred, stat.mode([pred1[i],
                                                  pred2[i], 
                                                  pred3[i]]))
final_pred

array([0., 1., 1., 0., 2., 1., 2., 0., 0., 2., 1., 0., 2., 1., 1., 0., 1.,
       1., 0., 0., 1., 1., 2., 0., 2., 1., 0., 0., 1., 2., 1., 2., 1., 2.,
       2., 0., 1., 0.])

### This is also called as a hard voting since we are using the model predictions to get the final combined prediction 

In [20]:
total=np.sum(y_test==final_pred)

print("Accuracy:",total,"/",len(final_pred),"* 100 ="," {0:.3f}".format(total/len(final_pred)*100),"%")

Accuracy: 37 / 38 * 100 =  97.368 %


### Soft voting in python

In [17]:
spred1=model1.predict_proba(X_test)
spred2=model2.predict_proba(X_test)
spred3=model3.predict_proba(X_test)

finalpred=(spred1+spred2+spred3)/3
finalpred

array([[0.9884574 , 0.00589594, 0.00564665],
       [0.05621893, 0.93409007, 0.009691  ],
       [0.00519164, 0.98465785, 0.01015051],
       [0.99030561, 0.00552872, 0.00416568],
       [0.00807645, 0.01468448, 0.97723907],
       [0.00424243, 0.75570437, 0.24005321],
       [0.00643323, 0.26039311, 0.73317367],
       [0.97405879, 0.02080457, 0.00513664],
       [0.98343223, 0.01184161, 0.00472615],
       [0.00399216, 0.00678745, 0.9892204 ],
       [0.00286603, 0.9750194 , 0.02211458],
       [0.98324697, 0.01218192, 0.00457111],
       [0.00431274, 0.00553233, 0.99015492],
       [0.00406214, 0.97002753, 0.02591033],
       [0.00321729, 0.89451435, 0.10226836],
       [0.98494445, 0.00909933, 0.00595622],
       [0.00363904, 0.99143624, 0.00492472],
       [0.00342979, 0.84146031, 0.1551099 ],
       [0.98810165, 0.00772726, 0.00417109],
       [0.99073784, 0.00512885, 0.00413331],
       [0.00344255, 0.91790337, 0.07865409],
       [0.0037071 , 0.76108499, 0.2352079 ],
       [0.

In [21]:
final_classes = finalpred.argmax(axis=-1) 
final_classes

array([0, 1, 1, 0, 2, 1, 2, 0, 0, 2, 1, 0, 2, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       2, 0, 2, 1, 0, 0, 1, 2, 1, 2, 1, 2, 2, 0, 1, 0], dtype=int64)

In [22]:
total=np.sum([y_test[i]==final_classes[i] for i in range(len(final_classes))])

print("Accuracy:",total,"/",len(final_classes),"* 100 =","{0:.3f}".format(total/len(final_classes)*100),"%")

Accuracy: 37 / 38 * 100 = 97.368 %


### We can also use VotingClassifier from sklearn to combine the models

In [23]:
emodel = VotingClassifier(estimators=[('dt', model1), ('knn', model2),
                                    ('svc', model3)],
                        voting='soft')
emodel.fit(X_train, y_train)

VotingClassifier(estimators=[('dt', DecisionTreeClassifier(max_depth=4)),
                             ('knn', KNeighborsClassifier(n_neighbors=7)),
                             ('svc', SVC(probability=True))],
                 voting='soft')

In [24]:
#The accuracy score for the ensemble model is
escore=emodel.score(X_test, y_test)
print(escore)

0.9736842105263158


In [25]:
emodel.classes_

array([0, 1, 2])