## random forest

In [1]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate
from sklearn.model_selection import LeaveOneOut
from sklearn import metrics    # contains all the effectivness idexes 
import pandas as pd    # for the dataset
from matplotlib import pyplot as plt
from statistics import mean
import numpy as np

#### getting the dataset

In [2]:
labels = ['class', 'spec_num', 'eccentr', 'asp_ratio', 'elong', 'solidity', 'stoch_conv', 'iso_factor', 'max_ind_depth', 'lobedness', 'av_intensity', 'av_contr', 'smooth', 'third_mom', 'unif', 'entropy']

# note that the path is relative not absolute
df = pd.read_csv(r'./leaf/leaf.csv', header = None, names = labels)

# shuffling the dataframe
df = df.sample(frac=1).reset_index()
df = df.iloc[:, 1:17]   # needed to eliminate the old indexes column

X = df.iloc[:, 2:16]
y = df.iloc[:, 0]

#### evaluation of the technique using cross validation

In [4]:
clf = RandomForestClassifier(n_estimators=500, criterion='gini', max_features='sqrt')

In [None]:
# k-fold cross validation

k = 5

effect_cv = cross_validate(clf, X, y, cv=k, scoring=('balanced_accuracy', 'roc_auc_ovo', 'roc_auc_ovr'))

In [None]:
print("balanced accuracy: " + str(mean(effect_cv['test_balanced_accuracy'])))
print("ROC AUC OVO: " + str(mean(effect_cv['test_roc_auc_ovo'])))
print("ROC AUC OVR: " + str(mean(effect_cv['test_roc_auc_ovr'])))

In [None]:
# leave-one-out cross validation

effect_loocv = cross_validate(clf, X, y, cv=LeaveOneOut(), scoring='accuracy')

In [None]:
print("accuracy: " + str(mean(effect_loocv['test_score'])))