# Application 2: Iris Flower Species Identification 

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn import datasets, preprocessing 
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier, plot_tree
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import *

import warnings
warnings.filterwarnings('ignore')

In [2]:
import ipywidgets as widgets
from IPython.display import clear_output

In [3]:
# load iris dataset
features = ['Sepal Length (cm)', 'Sepal Width (cm)', 'Petal Length (cm)', 'Petal Width (cm)']
target = ['Class']
columns = [*features, *target]
data = pd.read_csv('iris.csv', header=None, names=columns)
# data

In [4]:
# look for NaN values in each column
# data.isnull().sum()

In [5]:
# data['Class'].value_counts() 

In [6]:
# PREPROCESSING
data['Class'] = data.Class.map({'Iris-versicolor' : 1, 'Iris-virginica' : 2, 'Iris-setosa': 3})

In [7]:
# data.head()

In [8]:
min_max_scaler = preprocessing.MinMaxScaler()
X = data.iloc[:, :-1].values
Y = data.iloc[:, -1].values
# Normalize features
X = min_max_scaler.fit_transform(X)
SIZE_TEST = 0.3
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = SIZE_TEST, random_state = 0)

In [9]:
### Logistic Regression

In [10]:
lr_classifier = LogisticRegression(penalty='l2', #Ridge
                                   tol=0.0001,
                                   fit_intercept=True, 
                                   random_state=None, 
                                   max_iter=100)

In [11]:
_ = lr_classifier.fit(X_train, Y_train)

In [12]:
# print(f'No. of classes: {lr_classifier.classes_}')
# print(f'Coefficients: {lr_classifier.coef_}')
# print(f'Intercept: {lr_classifier.intercept_}')
# print(f'No. of iterations: {lr_classifier.n_iter_}')

In [13]:
# y_pred = lr_classifier.predict(X_test)

In [14]:
# print(confusion_matrix(Y_test, y_pred))

In [15]:
# print(classification_report(Y_test, y_pred))

In [16]:
### Linear Discriminant Analysis

In [17]:
lda_classifier = LinearDiscriminantAnalysis(solver='svd', 
                                            shrinkage=None, 
                                            priors=None, 
                                            n_components=None, 
                                            store_covariance=False, 
                                            tol=0.0001, 
                                            covariance_estimator=None
                                           )
# How many dimensions / features were used for the classification?
# If a subset was used, how was it decided?
# Can we see the mean and scatter of each feature used?

In [18]:
lda_classifier.fit(X_train, Y_train)
y_pred = lda_classifier.predict(X_test)

In [19]:
# print(confusion_matrix(Y_test, y_pred))

In [20]:
# print(classification_report(Y_test, y_pred))

In [21]:
# lda_classifier.classes_

In [22]:
# lda_classifier.means_

In [23]:
### Support Vector Machine

In [24]:
svc_classifier = SVC(C=1.0, 
                     kernel='rbf', 
                     degree=3, gamma='scale', 
                     coef0=0.0, 
                     shrinking=True, 
                     probability=False, 
                     tol=0.001, 
                     cache_size=200, 
                     class_weight=None, 
                     verbose=False,
                     max_iter=- 1, 
                     decision_function_shape='ovr', 
                     break_ties=False, 
                     random_state=None)

In [25]:
svc_classifier.fit(X_train, Y_train)
y_pred = svc_classifier.predict(X_test)

In [26]:
# print(confusion_matrix(Y_test, y_pred))

In [27]:
# print(classification_report(Y_test, y_pred))

In [28]:
### KNN Classifier

In [29]:
kn_classifier = KNeighborsClassifier(n_neighbors=5, 
                                     weights='uniform', 
                                     algorithm='auto', #used to identify nearest neighbours
                                     p=2, #euclidean_distance
                                     metric='minkowski')

In [30]:
_ = kn_classifier.fit(X_train, Y_train)

In [31]:
# print(f'Effective Metric = {kn_classifier.effective_metric_}')

In [32]:
y_pred = kn_classifier.predict(X_test)

In [33]:
# print(confusion_matrix(Y_test, y_pred))

In [34]:
# print(classification_report(Y_test, y_pred))

In [35]:
### Gaussian Naive Bayes Classifier

In [36]:
gnb_classifier = GaussianNB()

In [37]:
gnb_classifier.fit(X_train, Y_train)
y_pred = gnb_classifier.predict(X_test)

In [38]:
# print(confusion_matrix(Y_test, y_pred))

In [39]:
# print(classification_report(Y_test, y_pred))

In [40]:
### Decision Tree

In [41]:
dt_classifier = DecisionTreeClassifier(criterion='gini', 
                                       splitter='best', 
                                       max_depth=None, 
                                       min_samples_split=2, 
                                       min_samples_leaf=1, 
                                       min_weight_fraction_leaf=0.0, 
                                       max_features=None, 
                                       random_state=43, 
                                       max_leaf_nodes=None, 
                                       min_impurity_decrease=0.0, 
                                       class_weight=None, 
                                       ccp_alpha=0.0
                                      )

In [42]:
_ = dt_classifier.fit(X_train, Y_train)

In [43]:
y_pred = dt_classifier.predict(X_test)

In [44]:
# print(confusion_matrix(Y_test, y_pred))

In [45]:
# print(classification_report(Y_test, y_pred))

In [46]:
# dt_classifier.feature_importances_

In [47]:
# fig = plt.figure(figsize=(25,20))
# plot_tree(dt_classifier, 
#               feature_names = data.columns[0:-1],
#               filled=True)
# plt.show()

In [48]:
#fig.savefig("decision_tree.png")

In [63]:
### Random Forest

In [49]:
rf_classifier = RandomForestClassifier(n_estimators=100, 
                                       criterion='gini', 
                                       max_depth=None, 
                                       min_samples_split=2, 
                                       min_samples_leaf=1, 
                                       min_weight_fraction_leaf=0.0, 
                                       max_features='auto', 
                                       max_leaf_nodes=None, 
                                       min_impurity_decrease=0.0, 
                                       min_impurity_split=None, 
                                       bootstrap=True, 
                                       oob_score=False, 
                                       n_jobs=None, 
                                       random_state=43, 
                                       verbose=0, 
                                       warm_start=False, 
                                       class_weight=None, 
                                       ccp_alpha=0.0, 
                                       max_samples=None)

In [50]:
_ = rf_classifier.fit(X_train, Y_train)

In [51]:
y_pred = dt_classifier.predict(X_test)

In [52]:
# print(confusion_matrix(Y_test, y_pred))

In [53]:
# print(classification_report(Y_test, y_pred))

In [54]:
# Extract single tree
# estimator = rf_classifier.estimators_[0]

In [55]:
# fig = plt.figure(figsize=(25,10))
# plot_tree(rf_classifier.estimators_[0], 
#                   max_depth = 5,
#                   feature_names = data.columns[0:-1],
#                   rounded = True, 
#                   precision = 2,
#                   filled = True,
#                   )
# plt.show()

In [56]:
# fig = plt.figure(figsize=(25,10))

# plot_tree(rf_classifier.estimators_[1], 
#                   max_depth = 5,
#                   feature_names = data.columns[0:-1],
#                   rounded = True, 
#                   precision = 2,
#                   filled = True,
#                   )
# # plt.show()

In [57]:
# data.head()

In [58]:
sepal_length = widgets.Text(description="Sepal Length")
sepal_width = widgets.Text(description="Sepal Width")
petal_length = widgets.Text(description="Petal Length")
petal_width = widgets.Text(description="Petal Width")

In [59]:
print('Please enter the details:')
display(sepal_length)
display(sepal_width)
display(petal_length)
display(petal_width)

Please enter the details:


Text(value='', description='Sepal Length')

Text(value='', description='Sepal Width')

Text(value='', description='Petal Length')

Text(value='', description='Petal Width')

In [60]:
algorithm = widgets.Dropdown(
    options = [('Logistic Regression', 'LR'), 
               ('Linear Discriminant Analysis ', 'LDA'), 
               ('Support Vector Machines', 'SVM'),
               ('K-Nearest Neighbors', 'KN'),
               ('Naive Bayes', 'NB'),
               ('Decision Trees', 'DT'),
               ('Random Forest', 'RF'),
              ],
    disabled = False,
)

print('Select Algorithm:')
display(algorithm)

Select Algorithm


Dropdown(options=(('Logistic Regression', 'LR'), ('Linear Discriminant Analysis ', 'LDA'), ('Support Vector Ma…

In [61]:
prediction = widgets.Output()

button_predict = widgets.Button(description="Predict")

def on_button_predict_clicked(b):
    
    input_data = {}
    input_data['sepal_length'] = float(sepal_length.value)
    input_data['sepal_width'] = float(sepal_width.value)
    input_data['petal_length'] = float(petal_length.value)
    input_data['petal_width'] = float(petal_width.value)

    user_input = pd.DataFrame(input_data, columns = ['sepal_length','sepal_width','petal_length','petal_width'], index=[0])
    
    selected_algorithm = algorithm.value
    
    if selected_algorithm == 'LR':
        classifier = lr_classifier
    elif selected_algorithm == 'LDA':
        classifier = lda_classifier
    elif selected_algorithm == 'SVM':
        classifier = svc_classifier        
    elif selected_algorithm == 'KN':
        classifier = kn_classifier
    elif selected_algorithm == 'NB':
        classifier = gnb_classifier
    elif selected_algorithm == 'DT':
        classifier = dt_classifier
    elif selected_algorithm == 'RF':
        classifier = rf_classifier
        
    with prediction:
        clear_output(True)
        print(f'Selected Algorithm = {selected_algorithm}')
        print(classifier.predict(user_input)[0])
        
button_predict.on_click(on_button_predict_clicked)

In [62]:
display(button_predict)
display(prediction)

Button(description='Predict', style=ButtonStyle())

Output()