In [None]:
##########################################################
# 1. IMPORT ALL PACKAGES
##########################################################
import pandas as pd
import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder
import math
from collections import Counter
import seaborn as sns
import matplotlib.pyplot as plt #for plotting
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import multilabel_confusion_matrix
# importing mean()
from statistics import mean

In [None]:
# 2. LOAD DATASET
##########################################################
data = pd.read_csv("../input/iris-flower-dataset/IRIS.csv",header=0)# header 0 means the first row is name of the coloumn

labelencoder = LabelEncoder()
data['species'] = labelencoder.fit_transform(data['species'])

data.head(10)



In [None]:
# Check all classes/labels in trainiing data
all_label = set(data['species'].tolist())
print("All labels: {0}".format(all_label))

# Data distribution for each class
dst_data = Counter(data['species'])
print(dst_data)

# Plot distribution
sns.countplot(data['species'])



In [None]:
##########################################################
# 3. SHARE TO TEST AND TRAIN DATA
##########################################################
x = data.iloc[:, 1:]
y = data['species'].tolist()
# Share test and train data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)

data.iloc[0:1, 1:].values

In [None]:
# 6. TRAIN RANDOM FOREST ALGORITHM
##########################################################
# Create a RandomForestClassifier object with the parameters over the data
# n_estimators (default=100) = the number of trees in the forest.
# max_depth (default=None) = the maximum depth of the tree.
model_clf = RandomForestClassifier(n_estimators=500, max_depth=2, random_state=0)

# Train the Random Forest algorithm
model_clf.fit(x_train, y_train)

In [None]:
# 6. APPLY THE TRAINED LEARNER TO TEST NEW DATA
##########################################################
# Apply the trained perceptron to make prediction of test data
y_pred = model_clf.predict(x_test)

In [None]:
##########################################################
# 7. MULTI-CLASS CONFUSION MATRIX FOR EACH CLASS
##########################################################
# Actual and predicted classes
lst_actual_class = y_test
lst_predicted_class = y_pred


lst_classes = [0, 1, 2] # Must in order

# Compute multi-class confusion matrix
arr_out_matrix = multilabel_confusion_matrix(lst_actual_class, lst_predicted_class, labels=lst_classes)

# Temp store results
store_sens = [];
store_spec = [];
store_acc = [];
store_bal_acc = [];
store_prec = [];
store_fscore = [];
store_mcc = [];

# Loop for each taget label
for no_class in range(len(lst_classes)):
    arr_data = arr_out_matrix[no_class];
    print("Predicted Performance of Digit Label/Class: {0}".format(no_class));

    tp = arr_data[1][1]
    fp = arr_data[0][1]
    tn = arr_data[0][0]
    fn = arr_data[1][0]

    sensitivity = round(tp/(tp+fn), 3);
    specificity = round(tn/(tn+fp), 3);
    accuracy = round((tp+tn)/(tp+fp+tn+fn), 3);
    balanced_accuracy = round((sensitivity+specificity)/2, 3);
    precision = round(tp/(tp + fp), 3)
    fscore = round((2 * ((precision * sensitivity) / (precision + sensitivity))), 3)
    mcc = round((((tp * tn)-(fp * fn))/ math.sqrt((tp + fp)*(tp + fn)*(tn + fp)*(tn + fn))),3)

    store_sens.append(sensitivity);
    store_spec.append(specificity);
    store_acc.append(accuracy);
    store_bal_acc.append(balanced_accuracy);
    store_prec.append(precision);
    store_fscore.append(fscore);
    store_mcc.append(mcc);
    
    print("TP={0}, FP={1}, TN={2}, FN={3}".format(tp, fp, tn, fn));
    print("Sensitivity: {0}".format(sensitivity));
    print("Specificity: {0}".format(specificity));
    print("Accuracy: {0}".format(accuracy));
    print("Balanced Accuracy: {0}".format(balanced_accuracy));
    print("Precision: {0}".format(precision));
    print("F1-Score: {0}".format(fscore))
    print("MCC: {0}\n".format(mcc))

In [None]:
##########################################################
# 8. OVERALL - FINAL PREDICTION PERFORMANCE
##########################################################
print("Overall Performance Prediction:");
print("Sensitivity: {0}%".format(round(mean(store_sens)*100, 4)));
print("Specificity: {0}%".format(round(mean(store_spec)*100, 4)));
print("Accuracy: {0}%".format(round(mean(store_acc)*100, 4)));
print("Balanced Accuracy: {0}%".format(round(mean(store_bal_acc)*100, 4)));
print("Precision: {0}%".format(round(mean(store_prec)*100, 4)));
print("F1-Score: {0}%".format(round(mean(store_fscore)*100, 4)))
print("MCC: {0}\n".format(round(mean(store_mcc), 4)))