In [68]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_iris


In [69]:
iris = load_iris()

In [70]:
iris.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'DESCR', 'feature_names', 'filename', 'data_module'])

In [71]:
iris_df = pd.DataFrame(iris.data,columns = iris.feature_names)
iris_df["species"] = iris.target
iris_df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [72]:
X = iris_df.drop("species",axis=1)
y = iris_df["species"]

X_train,X_test,y_train,y_test = train_test_split(X,y,test_size = 0.3,random_state=17)
print("the dimension of X_train is {}".format(X_train.shape))
print("the dimension of X_test is {}".format(X_test.shape))

the dimension of X_train is (105, 4)
the dimension of X_test is (45, 4)


### Ensemble Technique

In [73]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier           #to take majority voting

from sklearn.metrics import accuracy_score,mean_squared_error


In [74]:
log = LogisticRegression()
dt=DecisionTreeClassifier()
rnf = RandomForestClassifier()

In [75]:
voting = VotingClassifier(
    estimators=[('logistic_regression',log),("decision_tree",dt),("random_forest",rnf)],voting="hard"
)

In [76]:
voting.fit(X_train,y_train)

In [77]:
voting.predict(X_test)

array([0, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 0, 1, 0, 2, 0, 0, 2, 2, 2, 1, 0,
       2, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 2, 1, 0, 1, 1, 0, 1,
       2])

In [78]:
for clf in (log,dt,rnf,voting):
    clf.fit(X_train,y_train)
    y_pred = clf.predict(X_test)
    trainscore = clf.score(X_train,y_train)
    testscore =clf.score(X_test,y_test)
    print(f"Train Accuarcy for {clf.__class__.__name__} : {trainscore}")
    print(f"Test Accuarcy for {clf.__class__.__name__}  : {testscore}")
    mse = mean_squared_error(y_test,y_pred)            
    print("mse:",mse)
    print("RMSE:",mse**(1/2))
    print("_ _ _ _ _ _ _ _")

Train Accuarcy for LogisticRegression : 0.9809523809523809
Test Accuarcy for LogisticRegression  : 1.0
mse: 0.0
RMSE: 0.0
_ _ _ _ _ _ _ _
Train Accuarcy for DecisionTreeClassifier : 1.0
Test Accuarcy for DecisionTreeClassifier  : 1.0
mse: 0.0
RMSE: 0.0
_ _ _ _ _ _ _ _
Train Accuarcy for RandomForestClassifier : 1.0
Test Accuarcy for RandomForestClassifier  : 0.9777777777777777
mse: 0.022222222222222223
RMSE: 0.14907119849998599
_ _ _ _ _ _ _ _
Train Accuarcy for VotingClassifier : 1.0
Test Accuarcy for VotingClassifier  : 1.0
mse: 0.0
RMSE: 0.0
_ _ _ _ _ _ _ _
