# Test: Bestimmung Precision / Recall mit einem DT
Wir haben bereits diverse Matrizen mit einer SVM getestet und Precision und Recall bestimmt. In diesem Notebook bestimmen wir Precision und Recall bei der Klassifikation mit einem Decision Tree und der Klassifikaitonsmatrix.

#### Test mit Klassifikationsmatrix
* #1: Precision 0.865206402696, Recall 0.849462365591
* #2: Precision 0.85737840066, Recall 0.860215053763


In [2]:
import time
import matplotlib.pyplot as plt
import numpy as np

from imports.matrix_helper import MatrixHelper
from sklearn import tree
from sklearn.metrics import precision_recall_curve

matrix_helper = MatrixHelper()

# Read pickle
feature_matrix = matrix_helper.load_from_parse('data/feature_matrix_sparse_classification.pickle')[0]

# Create own matrices for vulenrable and not vulnerable components
vulnerable, not_vulnerable = matrix_helper.split_vulnerable_notvulnerable(feature_matrix)

# Split into training sets (2/3) and test sets (1/3)
not_vulnerable_training, not_vulnerable_test = matrix_helper.split_training_test(not_vulnerable, 2.0/3)
vulnerable_training, vulnerable_test = matrix_helper.split_training_test(vulnerable, 2.0/3)

# Concatenate vulnerable/not-vulnerable and split into training and target matrices
training_data, training_target = matrix_helper.create_data_target(not_vulnerable_training, vulnerable_training)
test_data, test_target = matrix_helper.create_data_target(not_vulnerable_test, vulnerable_test)

start = time.time()

# create the DT classifier
clf = tree.DecisionTreeClassifier()

 # Fit classifier to the model
clf.fit(training_data, training_target)

# Predict remaining data
target_prediction = clf.predict(test_data)

# Compute Precision-Recall
precision, recall, thresholds = precision_recall_curve(test_target, target_prediction)

end = time.time()
elapsed = (end - start) / 60

# Print
print("Verwundbare Komponenten im Testset: {0:10.1f}%".format(matrix_helper.get_vulnerable_percentage(test_target)))
print("Verwundbare Komponenten in Vorhersage: {0:10.1f}%".format(matrix_helper.get_vulnerable_percentage(target_prediction)))
print('------------------------------')
print('Precision {}, Recall {}'.format(precision[1], recall[1]))
print('time: {0:10.1f}min'.format(elapsed))

Verwundbare Komponenten im Testset:       18.6%
Verwundbare Komponenten in Vorhersage:       19.0%
------------------------------
Precision 0.699757869249, Recall 0.71652892562
time:        1.4min
