In [13]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('iris.data.csv', names = ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species'], 
                index_col = False)

In [3]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
from sklearn.preprocessing import LabelEncoder

In [5]:
encoder = LabelEncoder()
df['species'] = encoder.fit_transform(df.species)

In [6]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [7]:
df_new = df[df.species != 0].iloc[:, [0, 1, 4]]

In [8]:
df_new

Unnamed: 0,sepal_length,sepal_width,species
50,7.0,3.2,1
51,6.4,3.2,1
52,6.9,3.1,1
53,5.5,2.3,1
54,6.5,2.8,1
...,...,...,...
145,6.7,3.0,2
146,6.3,2.5,2
147,6.5,3.0,2
148,6.2,3.4,2


In [9]:
X = df.iloc[:, 0:2]
y = df.iloc[:, -1]

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

In [11]:
clf1 = LogisticRegression()
clf2 = RandomForestClassifier()
clf3 = KNeighborsClassifier()

In [12]:
estimators = [('lr', clf1), ('rf', clf2), ('knn', clf3)]

In [14]:
for estimator in estimators:
    x = cross_val_score(estimator[1], X, y, cv = 10, scoring = 'accuracy')
    print(f"for {estimator[0]} accuracy is: {np.round(np.mean(x), 2)}")

for lr accuracy is: 0.81
for rf accuracy is: 0.71
for knn accuracy is: 0.76


In [15]:
from sklearn.ensemble import VotingClassifier

### Hard Voting

In [16]:
vc = VotingClassifier(estimators=estimators)
x = cross_val_score(vc, X, y, cv=10, scoring='accuracy')
print(np.round(np.mean(x), 2))

0.77


### Soft Voting

In [17]:
vc = VotingClassifier(estimators=estimators, voting='soft')
x = cross_val_score(vc, X, y, cv=10, scoring='accuracy')
print(np.round(np.mean(x), 2))

0.77


### Using Weights

In [21]:
for i in range(1, 4):
    for j in range(1, 4):
        for k in range(1, 4):
            vc = VotingClassifier(estimators=estimators, voting='soft', weights=[i, j, k])
            x = cross_val_score(vc, X, y, cv=10, scoring='accuracy')
            print(f"for i={i}, j={j}, k={k} accuracy: {np.round(np.mean(x), 2)}")

for i=1, j=1, k=1 accuracy: 0.77
for i=1, j=1, k=2 accuracy: 0.77
for i=1, j=1, k=3 accuracy: 0.76
for i=1, j=2, k=1 accuracy: 0.74
for i=1, j=2, k=2 accuracy: 0.77
for i=1, j=2, k=3 accuracy: 0.75
for i=1, j=3, k=1 accuracy: 0.73
for i=1, j=3, k=2 accuracy: 0.75
for i=1, j=3, k=3 accuracy: 0.76
for i=2, j=1, k=1 accuracy: 0.77
for i=2, j=1, k=2 accuracy: 0.77
for i=2, j=1, k=3 accuracy: 0.77
for i=2, j=2, k=1 accuracy: 0.75
for i=2, j=2, k=2 accuracy: 0.77
for i=2, j=2, k=3 accuracy: 0.77
for i=2, j=3, k=1 accuracy: 0.76
for i=2, j=3, k=2 accuracy: 0.76
for i=2, j=3, k=3 accuracy: 0.77
for i=3, j=1, k=1 accuracy: 0.79
for i=3, j=1, k=2 accuracy: 0.78
for i=3, j=1, k=3 accuracy: 0.79
for i=3, j=2, k=1 accuracy: 0.77
for i=3, j=2, k=2 accuracy: 0.77
for i=3, j=2, k=3 accuracy: 0.77
for i=3, j=3, k=1 accuracy: 0.77
for i=3, j=3, k=2 accuracy: 0.76
for i=3, j=3, k=3 accuracy: 0.77


### Interesting use of VotingClassifier

In [23]:
from sklearn.svm import SVC
from sklearn.datasets import make_classification

In [24]:
X, y = make_classification(n_samples=2000, n_features=20, n_informative=15, n_redundant=5, random_state=2)

svm1 = SVC(probability=True, kernel="poly", degree=1)
svm2 = SVC(probability=True, kernel="poly", degree=2)
svm3 = SVC(probability=True, kernel="poly", degree=3)
svm4 = SVC(probability=True, kernel="poly", degree=4)
svm5 = SVC(probability=True, kernel="poly", degree=5)

estimators = [('svm1', svm1), ('svm2', svm2), ('svm3', svm3), ('svm4', svm4), ('svm5', svm5)]

for estimator in estimators:
    x = cross_val_score(estimator[1], X, y, cv=10, scoring='accuracy')
    print(f"{estimator[0]}: {np.round(np.mean(x), 2)}")

svm1: 0.82
svm2: 0.88
svm3: 0.91
svm4: 0.87
svm5: 0.9


In [26]:
vc1 = VotingClassifier(estimators=estimators, voting='soft')
x = cross_val_score(vc1, X, y, cv=10, scoring='accuracy')
print(np.round(np.mean(x), 2))

0.94
