In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter

from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score

# base models
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
# majority vote
from sklearn.ensemble import VotingClassifier
# bagging
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
# boosting
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import GradientBoostingClassifier

# Get the data

In [14]:
iris = load_iris()
feature_names = iris.feature_names
target_names = iris.target_names
X = pd.DataFrame(iris.data, columns=feature_names)
y = iris.target

In [15]:
ss = StandardScaler()
X = pd.DataFrame(ss.fit_transform(X), columns=X.columns)

In [16]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3,stratify=y,random_state=23)

# Majority Vote

In [17]:
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)
print(cross_val_score(knn, X_test, y_test))
cross_val_score(knn, X_test, y_test).mean()

[0.88888889 0.88888889 1.         0.88888889 1.        ]


0.9333333333333332

In [18]:
dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)
print(cross_val_score(dt, X_test, y_test))
cross_val_score(dt, X_test, y_test).mean()

[0.88888889 0.77777778 1.         0.88888889 0.77777778]


0.8666666666666666

In [19]:
gnb = GaussianNB()
gnb.fit(X_train,y_train)
print(cross_val_score(gnb, X_test, y_test))
cross_val_score(gnb, X_test, y_test).mean()

[0.88888889 1.         1.         0.77777778 1.        ]


0.9333333333333332

In [20]:
voter = VotingClassifier(estimators=[('dt',dt),('bayes',gnb),('neighbors',knn)],voting='soft')
voter.fit(X_train,y_train)
print(cross_val_score(voter, X_test, y_test))
cross_val_score(voter, X_test, y_test).mean()

[1.         0.88888889 1.         0.88888889 1.        ]


0.9555555555555555

# Bagging


In [21]:
rfc = RandomForestClassifier()
rfc.fit(X_train,y_train)
print(cross_val_score(rfc, X_test, y_test))
cross_val_score(rfc, X_test, y_test).mean()

[0.88888889 0.77777778 1.         0.88888889 1.        ]


0.9111111111111111

In [22]:
bc = BaggingClassifier(base_estimator=dt)
bc.fit(X_train,y_train)
print(cross_val_score(bc, X_test, y_test))
cross_val_score(bc, X_test, y_test).mean()

[0.88888889 0.77777778 1.         0.88888889 1.        ]


0.9111111111111111

# Boosting

In [23]:
gbc = GradientBoostingClassifier()
gbc.fit(X_train,y_train)
print(cross_val_score(gbc, X_test, y_test))
cross_val_score(gbc, X_test, y_test).mean()

[0.88888889 0.77777778 1.         0.88888889 1.        ]


0.9111111111111111

In [24]:
abc = AdaBoostClassifier()
abc.fit(X_train,y_train)
print(cross_val_score(abc, X_test, y_test))
cross_val_score(abc, X_test, y_test).mean()

[0.88888889 0.77777778 1.         0.88888889 0.88888889]


0.9333333333333332