<div style="width: 100%; background-color: #222; text-align: center">
<br><br>

<h1 style="color: white; font-weight: bold;">
    Project
</h1>
    
<h3 style="color: #ef7d22; font-weight: normal;">
    Build an ensemble voting classifier
</h3>

<br><br> 
</div>

![orange-divider](https://user-images.githubusercontent.com/7065401/98619088-44ab6000-22e1-11eb-8f6d-5532e68ab274.png)

In [1]:
# Import packages
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import StackingClassifier,VotingClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import Normalizer,StandardScaler

import warnings
warnings.filterwarnings('ignore')

In [6]:
# Load the data using the load_iris method from sklearn
X, y = load_iris(return_X_y=True)

# Split data into test and train - stratify on y
X_train,X_test,y_train,y_test = train_test_split(X, y, stratify=y, test_size=.2, random_state=42)

# Normalize and Scale numeric data
stsc = StandardScaler()
norm = Normalizer()

X_train_norm = norm.fit_transform(X_train)
X_train_scaled = stsc.fit_transform(X_train_norm)

X_test_norm = norm.transform(X_test)
X_test_scaled = stsc.transform(X_test_norm)

# Train 3 classifiers (the solutions use GaussianNB, MLPClassifier and DecisionTreeClassfier, but you may use any you wish)
gaus = GaussianNB()
mlpc = MLPClassifier(random_state=42)
dtc = DecisionTreeClassifier(random_state=42)

# Fit the 3 base classifiers on train data
gaus.fit(X_train_scaled, y_train)
mlpc.fit(X_train_scaled, y_train)
dtc.fit(X_train_scaled, y_train)

DecisionTreeClassifier(random_state=42)

In [7]:
# Instantiate the VotingClassifier using the 3 base classifiers
vt = VotingClassifier([('gaus',gaus),('mlpc',mlpc),('dtc',dtc)],voting='soft',n_jobs=-1)
# Fit the voting classifier to the train data
vt.fit(X_train_scaled,y_train)

VotingClassifier(estimators=[('gaus', GaussianNB()),
                             ('mlpc', MLPClassifier(random_state=42)),
                             ('dtc', DecisionTreeClassifier(random_state=42))],
                 n_jobs=-1, voting='soft')

In [8]:
# Check scores for training accuracy on all 3 base estimators
gaus.score(X_train_scaled, y_train),mlpc.score(X_train_scaled, y_train),dtc.score(X_train_scaled, y_train)

(0.975, 0.9583333333333334, 1.0)

In [9]:
# Check scores for all 3 base estimators on test data
gaus.score(X_test_scaled, y_test),mlpc.score(X_test_scaled, y_test),dtc.score(X_test_scaled, y_test)

(0.9666666666666667, 0.9666666666666667, 0.9666666666666667)

In [11]:
# Check train and test accuracy on the ensemble classifier
vt.score(X_train_scaled,y_train),vt.score(X_test_scaled,y_test)

(0.9833333333333333, 0.9666666666666667)

In [15]:
## Use the StackingClassifier on this data to see if there is an improvement on performance
# Create a list of tuples with the 3 base estimators
estimators = [('nbc',GaussianNB()),
              ('mlpc',MLPClassifier(random_state=42)),
              ('dtc',DecisionTreeClassifier(random_state=42))]

# Instatiate the stacking classifier
stack = StackingClassifier(estimators=estimators,
                           final_estimator=MLPClassifier(random_state=42),
                           cv=3)



# Fit the stacking classifier to the train data
stack.fit(X_train_scaled,y_train)
# Get the scores for train and test data
stack.score(X_train_scaled,y_train),stack.score(X_test_scaled,y_test)

(0.9833333333333333, 0.9666666666666667)