# Test: Bestimmung Precision / Recall mit einem DT
Wir haben bereits diverse Matrizen mit einer SVM getestet und Precision und Recall bestimmt. In diesem Notebook bestimmen wir Precision und Recall bei der Klassifikation mit einem Decision Tree und der Klassifikaitonsmatrix.

# Das folgende Skript ist für einen average test mit einer einzelnen Matrix

In [1]:
import matplotlib.pyplot as plt
import numpy as np

from imports.matrix_helper import MatrixHelper
from imports.prediction_helper import PredictionHelper
from sklearn.metrics import precision_recall_curve

matrix_helper = MatrixHelper()

# Read pickle
matrices = matrix_helper.load_from_parse('data/matrices/matrix_cla_incl_history.pickle')

precision_list = []
recall_list = []
for i in range(10):
    # Instantiate Prediction Helper Class and predict values for compare matrix with an SVM
    prediction_helper = PredictionHelper()
    prediction_helper.calculate_validation_compare_matrix(matrices, sampling_factor=(2.0/3), model_type='DT')
    compare_matrix = prediction_helper.get_compare_matrix()

    # Compute Precision-Recall
    precision, recall, thresholds = precision_recall_curve(np.array(compare_matrix[:, 2], dtype='f'), np.array(compare_matrix[:, 1], dtype='f'))

    precision_list.append(precision[1])
    recall_list.append(recall[1])
    # Print
    print('|{:^11.3f}|{:^8.3f}|{:5.1f}min|'.format(precision[1], recall[1], prediction_helper.time))

precision_average = sum(precision_list) / float(len(precision_list))
recall_average = float(sum(recall_list)) / len(recall_list)
print('Average precision: {}'.format(precision_average))
print('Average recall: {}'.format(recall_average))

|   0.627   | 0.634  |  1.3min|
|   0.582   | 0.596  |  1.4min|
|   0.587   | 0.641  |  1.4min|
|   0.587   | 0.626  |  1.4min|
|   0.560   | 0.627  |  1.4min|
|   0.568   | 0.604  |  1.3min|
|   0.592   | 0.629  |  1.3min|
|   0.538   | 0.606  |  3.4min|
|   0.585   | 0.636  |  1.1min|
|   0.593   | 0.609  |  1.1min|
Average precision: 0.581788637051
Average recall: 0.620632279534


# Das folgende Skript ist für das Testen verschiedener features

In [1]:
import time
import matplotlib.pyplot as plt
import numpy as np

from imports.matrix_helper import MatrixHelper
from imports.prediction_helper import PredictionHelper
from sklearn.metrics import precision_recall_curve

matrix_helper = MatrixHelper()

for filename in ['calls_current', 'calls_history']:
    # Read pickle
    matrices = matrix_helper.load_from_parse('data/matrices/matrix_cla_' + filename + '.pickle')

    # Instantiate Prediction Helper Class and predict values for compare matrix with a DT
    prediction_helper = PredictionHelper()
    prediction_helper.calculate_validation_compare_matrix(matrices, sampling_factor=(2.0/3), model_type='DT')
    compare_matrix = prediction_helper.get_compare_matrix()

    # Compute Precision-Recall
    precision, recall, thresholds = precision_recall_curve(np.array(compare_matrix[:, 2], dtype='f'), np.array(compare_matrix[:, 1], dtype='f'))

    # Print
    vulnerable_testset = matrix_helper.get_vulnerable_percentage(np.array(compare_matrix[:, 2], dtype='f'))
    vulnerable_prediction = matrix_helper.get_vulnerable_percentage(np.array(compare_matrix[:, 1], dtype='f'))
    print('|{:30}|{:^11.3f}|{:^8.3f}|{:16.1f}%|{:19.1f}%|{}|({:5.1f}min)'.format(
        filename, precision[1], recall[1], vulnerable_testset, vulnerable_prediction, 
        prediction_helper.most_important_feature, prediction_helper.time)
         )

|calls_current                 |   0.527   | 0.387  |             9.8%|                7.2%|MOZ_ASSERT|( 12.8min)
|calls_history                 |   0.697   | 0.703  |            14.0%|               14.1%|toObject|( 10.1min)


## Ergebnisse mit div Features
| Matrix (Classification)      | Precision | Recall | Vul. in Testset | Vul. in Prediction | Most important feature |
|------------------------------|-----------|--------|-----------------|--------------------|------------------------|
|incl_current                  |   0.467   | 0.348  |             9.8%|                7.3%|nsContentUtils.h|
|incl_history                  |   0.670   | 0.692  |            17.8%|               18.4%|jsapi.h|
|cond_current                  |   0.599   | 0.163  |             9.8%|                2.7%|DEBUG|
|cond_history                  |   0.756   | 0.679  |            17.4%|               15.6%|DEBUG|
|names_current                 |   0.543   | 0.125  |             9.8%|                2.3%|mozilla::dom|
|names_history                 |   0.691   | 0.489  |            13.5%|                9.5%|js|
|defs_current                  |   0.569   | 0.052  |             9.8%|                0.9%|FT_COMPONENT|
|defs_history                  |   0.505   | 0.365  |             5.0%|                3.6%|FORCE_PR_LOG|
|calls_current                 |   0.527   | 0.387  |             9.8%|                7.2%|MOZ_ASSERT|
|calls_history                 |   0.697   | 0.703  |            14.0%|               14.1%|toObject|