# Folgendes Skript zeigt die Laufzeitunterschiede zwischen DTs und SVMs

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import time
from sklearn import svm
from sklearn import tree

from imports.matrix_helper import MatrixHelper
from imports.prediction_helper import PredictionHelper
from sklearn.metrics import precision_recall_curve

matrix_helper = MatrixHelper()

def predict(training_data, training_target, test_data, model_type):

    start = time.time()

    # Create the SVM or DT
    if (model_type == 'svm'):
        m = svm.SVC(kernel='linear', C=0.2)
    elif (model_type == 'dt'):
        m = tree.DecisionTreeClassifier()
    else:
        m = svm.LinearSVC(C=pen)

    # Fit model_type to the model
    m.fit(training_data, training_target)

    # Predict remaining data
    target_prediction = m.predict(test_data) 

    end = time.time()
    elapsed = (end - start) / 60
    
    return target_prediction, elapsed, target_score
    
# Read pickle
matrices = matrix_helper.load_from_parse('data/matrices/matrix_cla_incl_history.pickle')

feature_matrix = matrices[0]
# feature_matrix = feature_matrix[:1000, :]

rows = matrices[1]
columns = matrices[2]
features_count = feature_matrix.shape[1] - 1

# Create own matrices for vulenrable and not vulnerable entries
vulnerable_matrix, vulnerable_rows = matrix_helper.get_vulnerable_components(feature_matrix, rows)
not_vulnerable_matrix, not_vulnerable_rows = matrix_helper.get_not_vulnerable_components(feature_matrix, rows)


# Split into training sets (2/3) and test sets (1/3)
vulnerable_training, vulnerable_test = matrix_helper.split_training_test(vulnerable_matrix, (2.0/3), vulnerable_rows)
not_vulnerable_training, not_vulnerable_test = matrix_helper.split_training_test(not_vulnerable_matrix, (2.0/3), not_vulnerable_rows)


# Concatenate vulnerable/not-vulnerable
training_matrix = np.concatenate((not_vulnerable_training[0], vulnerable_training[0]), axis=0)
test_matrix = np.concatenate((not_vulnerable_test[0], vulnerable_test[0]), axis=0)
test_rows = not_vulnerable_test[1] + vulnerable_test[1]


# Split into training and target matrices
training_data, training_target = matrix_helper.create_data_target(training_matrix)
test_data, test_target = matrix_helper.create_data_target(test_matrix)


# Train the classification model and predict vulnerrabilities for test data
for clf in ['dt', 'SVLinear']:
    target_prediction, elapsed_time = predict(training_data, training_target, test_data, clf)
    
    precision, recall, thresholds = precision_recall_curve(test_target, target_prediction)

    print('|{}|{:^11.3f}|{:^8.3f}| {} |'.format(clf, precision[1], recall[1], elapsed_time))

    

| Type            | Precision | Recall | Time |
|-------------------------------------|-----------|--------|-----------------|
|dt|   0.593   | 0.617  | 1.15047711531 |
|svm|   0.775   | 0.602  | 9.84284751415 |
|linsvm|   0.736   | 0.631  | 0.0606333851814 |