In [1]:
from data import get_data
from sklearn.model_selection import StratifiedKFold
from models import classifier
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
import numpy as np
from sklearn import tree

In [2]:
MODELS = ['CNN', 'SVM', 'Decision_Tree', 'Random_Forest', 'Ada_Boost']

PARMAS = {
    'SVM': [
        {'kernel': 'rbf'},
        {'kernel': 'linear'},
        {'kernel': 'sigmoid'}
    ],
    'Decision_Tree': [
        {'criterion': 'gini', 'max_depth': 10},
        {'criterion': 'gini', 'max_depth': 25},
        {'criterion': 'gini', 'max_depth': 50},
        {'criterion': 'gini', 'max_depth': None},
        {'criterion': 'entropy', 'max_depth': 10},
        {'criterion': 'entropy', 'max_depth': 25},
        {'criterion': 'entropy', 'max_depth': 50},
        {'criterion': 'entropy', 'max_depth': None},
    ],
    'Random_Forest': [
        {'n_estimators': 10},
        {'n_estimators': 50},
        {'n_estimators': 100},
        {'n_estimators': 300},
    ],
    'Ada_Boost': [
        {'n_estimators': 10},
        {'n_estimators': 50},
        {'n_estimators': 100},
        {'n_estimators': 300},
    ],
    'CNN': [
        {'epoch': 50, 'lr': 1e-1},
        {'epoch': 50, 'lr': 1e-2},
        {'epoch': 50, 'lr': 1e-3},
        {'epoch': 100, 'lr': 1e-1},
        {'epoch': 100, 'lr': 1e-2},
        {'epoch': 100, 'lr': 1e-3},
        {'epoch': 300, 'lr': 1e-1},
        {'epoch': 300, 'lr': 1e-2},
        {'epoch': 300, 'lr': 1e-3},
    ],
}


In [3]:
def evaluation(gt, prediction):
    return f1_score(gt, prediction), precision_score(gt, prediction), recall_score(gt, prediction), accuracy_score(gt, prediction)

In [15]:
data, label = get_data('datasets/ionosphere.data')
skf = StratifiedKFold(n_splits=10, shuffle=True)

for m in ['Random_Forest']:
    for p in PARMAS[m]:
        f1_scores, precisions, recalls, accuracies = [], [], [], []
        for i in range(10):
            for train_index, test_index in skf.split(data, label):
                train = [[data[idx] for idx in train_index],
                         [label[idx] for idx in train_index]]
                test = [[data[idx] for idx in test_index],
                        [label[idx] for idx in test_index]]
                # print(len(train_index))
                prediction, gt, model = classifier(train, test, m, p)

                f1, pre, rec, acc = evaluation(gt, prediction)
                f1_scores.append(f1)
                precisions.append(pre)
                recalls.append(rec)
                accuracies.append(acc)
        print(f'{m}, params: {p}', end='\t\t\t\t\t')
        print('acc: {} precision: {}, recall: {}, f1: {}'.format(
            round(np.mean(accuracies), 4), round(np.mean(precisions), 4),
            round(np.mean(recalls), 4), round(np.mean(f1_scores), 4)
        ))

Preparing Data
Path : datasets/ionosphere.data
Data is successfully loaded
Random_Forest, params: {'n_estimators': 10}					acc: 0.9288 precision: 0.9396, recall: 0.9526, f1: 0.945
Random_Forest, params: {'n_estimators': 50}					acc: 0.9316 precision: 0.9366, recall: 0.9607, f1: 0.9476
Random_Forest, params: {'n_estimators': 100}					acc: 0.9325 precision: 0.935, recall: 0.9639, f1: 0.9482
Random_Forest, params: {'n_estimators': 300}					acc: 0.9336 precision: 0.9356, recall: 0.965, f1: 0.9492


In [16]:
# tree.plot_tree(model)

In [18]:
model

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=300,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [17]:
import graphviz
dot_data = tree.export_graphviz(model, out_file=None)
graph = graphviz.Source(dot_data)
graph.render('iris')

AttributeError: 'RandomForestClassifier' object has no attribute 'tree_'

In [12]:
dot_data = tree.export_graphviz(model, out_file=None,
                              feature_names = None,
                              class_names = None,
                              filled=True, rounded=True,
                              special_characters=True)

In [14]:
graph = graphviz.Source(dot_data)
graph.render('DT')

'DT.pdf'