In [5]:
###########################################################################
#        3 Machine Learning algorithms evaluation with iris dataset
#
# France
# November 2019
#
# Oriented by: Dr. R. Possamai and Ms. M. Trindade
# Author: LiÃ©ge Maldaner
# E-mail: liege.malda@gmail.com
#
#
# Results:
###########################################################################

# Packages
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import _pickle as plk
import os
from io import StringIO
import pylab as plt
from glob import glob
import argparse
#import progressbar
from numpy.lib import stride_tricks
from skimage import feature
from sklearn import metrics
from sklearn.model_selection import train_test_split
import time
import mahotas as mt
import random
from sklearn.datasets import load_iris
from sklearn.metrics import classification_report, confusion_matrix




In [31]:
def check_args(args):

    if args.classifier != "SVM" and args.classifier != "RF" and args.classifier != "GBC":
        raise ValueError("Classifier must be either SVM, RF or GBC")

    return args

def parse_args():
    parser = argparse.ArgumentParser()
    parser.add_argument("-c", "--classifier", help="Classification model to use", required = True)
    parser.add_argument("-o", "--output_model", help="Path to save model. Must end in .p", required = True)
    args = parser.parse_args()
    return check_args(args)

def read_data():

    print ('[INFO] Reading image data.')

    iris = load_iris()
    #feature_names = iris.feature_names
    classes = iris.target
    features = iris.data
    
    return features, classes

def dataset(features, classes):   
    print ('[INFO] Creating training dataset.')

    X_train = []
    X_test = []
    
    y_train = []
    y_test = []
    
    TEST_PERC = 0.25
    VAL_PERC = 0.10
    TRAIN_PERC = 0.75
    
    totalNum = len(features)
    testNum = int(TEST_PERC * totalNum)
    valNum = int(VAL_PERC * totalNum)
    trainNum = int(TRAIN_PERC * totalNum)
    
    index = np.arange(len(features))
    random_train =  np.random.choice(index,trainNum)
    random_test = np.random.choice(index,testNum)
    random_val = np.random.choice(index,valNum)
    
    X_train = features[random_train]
    X_test = features[random_test]
    X_val = features[random_val]
    
    y_train = classes[random_train]
    y_test = classes[random_test]
    y_val = classes[random_val]
    #for i in enumerate(features):
    #    .append(features)
        
    return X_train, X_test, X_val, y_train, y_test, y_val


In [25]:
def train_model(X, y, classifier):

    if classifier == "SVM":
        from sklearn.svm import SVC
        print ('[INFO] Training Support Vector Machine model.')
        model = SVC()
        model.fit(X, y)
    elif classifier == "RF":
        from sklearn.ensemble import RandomForestClassifier
        print ('[INFO] Training Random Forest model.')
        model = RandomForestClassifier(n_estimators=20, max_depth=12, random_state=0)
        # n_estimators = The number of trees in the forest;
        # max_depth = The maximum depth of the tree (tree size).
        # random_state = If int, random_state is the seed used by the random number generator; 
        model.fit(X, y)
    elif classifier == "GBC":
        from sklearn.ensemble import GradientBoostingClassifier
        model = GradientBoostingClassifier(n_estimators=20, learning_rate=1.0, max_depth=1, random_state=0)
        # learning_rate = learning rate shrinks the contribution of each tree by learning_rate. 
        #There is a trade-off between learning_rate and n_estimators;
        model.fit(X, y)

    print ('[INFO] Model training complete.')
    print ('[INFO] Training Accuracy: %.2f' %model.score(X, y))
    return model

In [35]:
def test_model(X, y, model):

    pred = model.predict(X)
    precision = metrics.precision_score(y, pred, average='weighted', labels=np.unique(pred))
    recall = metrics.recall_score(y, pred, average='weighted', labels=np.unique(pred))
    f1 = metrics.f1_score(y, pred, average='weighted', labels=np.unique(pred))
    accuracy = metrics.accuracy_score(y, pred)

    print ('--------------------------------')
    print('Confusion Matrix')
    print(confusion_matrix(y, pred))
    
    print ('--------------------------------')
    print('clssification report')
    print(classification_report(y, pred))
    
    print ('--------------------------------')
    print ('[RESULTS] Accuracy: %.2f' %accuracy)
    print ('[RESULTS] Precision: %.2f' %precision)
    print ('[RESULTS] Recall: %.2f' %recall)
    print ('[RESULTS] F1: %.2f' %f1)
    print ('--------------------------------')    
    

In [27]:
def main(classifier):

    start = time.time()

    features, classes = read_data()
    X_train, X_test, X_val, y_train, y_test, y_val = dataset(features, classes)
    model = train_model(X_train, y_train, classifier)
    test_model(X_test, y_test, model)
    #pkl.dump(model, open(output_model, "wb"))
    print ('Processing time:',time.time()-start)
    

In [45]:
print(iris.target_names)
print(iris.feature_names)

['setosa' 'versicolor' 'virginica']
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [36]:
main('GBC')

[INFO] Reading image data.
[INFO] Creating training dataset.
[INFO] Model training complete.
[INFO] Training Accuracy: 1.00
--------------------------------
Confusion Matrix
[[14  0  0]
 [ 0  8  1]
 [ 0  0 14]]
--------------------------------
clssification report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        14
           1       1.00      0.89      0.94         9
           2       0.93      1.00      0.97        14

    accuracy                           0.97        37
   macro avg       0.98      0.96      0.97        37
weighted avg       0.97      0.97      0.97        37

--------------------------------
[RESULTS] Accuracy: 0.97
[RESULTS] Precision: 0.97
[RESULTS] Recall: 0.97
[RESULTS] F1: 0.97
--------------------------------
Processing time: 0.04927515983581543


In [47]:
main('RF')

[INFO] Reading image data.
[INFO] Creating training dataset.
[INFO] Training Random Forest model.
[INFO] Model training complete.
[INFO] Training Accuracy: 1.00
--------------------------------
Confusion Matrix
[[18  0  0]
 [ 0  8  0]
 [ 0  0 11]]
--------------------------------
clssification report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        18
           1       1.00      1.00      1.00         8
           2       1.00      1.00      1.00        11

    accuracy                           1.00        37
   macro avg       1.00      1.00      1.00        37
weighted avg       1.00      1.00      1.00        37

--------------------------------
[RESULTS] Accuracy: 1.00
[RESULTS] Precision: 1.00
[RESULTS] Recall: 1.00
[RESULTS] F1: 1.00
--------------------------------
Processing time: 0.0469813346862793


In [43]:
iris = load_iris()
iris.classes_name

AttributeError: classes_name

In [46]:
main('SVM')

[INFO] Reading image data.
[INFO] Creating training dataset.
[INFO] Training Support Vector Machine model.
[INFO] Model training complete.
[INFO] Training Accuracy: 0.98
--------------------------------
Confusion Matrix
[[18  0  0]
 [ 0  8  0]
 [ 0  0 11]]
--------------------------------
clssification report
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        18
           1       1.00      1.00      1.00         8
           2       1.00      1.00      1.00        11

    accuracy                           1.00        37
   macro avg       1.00      1.00      1.00        37
weighted avg       1.00      1.00      1.00        37

--------------------------------
[RESULTS] Accuracy: 1.00
[RESULTS] Precision: 1.00
[RESULTS] Recall: 1.00
[RESULTS] F1: 1.00
--------------------------------
Processing time: 0.025807857513427734


