# Bestimmung Top10 durch Regression mit SVR
Wir lernen ein SVR-Modell mit einer feature matrix an, welche auf dem aktuellen Stand des Repo oder einer älteren Revision basiert. Anschliessend erstellen wir eine feature matrix mit allen samples, die zu diesem Zeitpunkt keine bekannten Schwachstellen aufweisen und wenden das angelernte SVR-Modell darauf an. 
Aus dem berechneten target extrahieren wir die 10 Komponenten mit den meisten vorhergesagten Verwundbarkeiten.

In [10]:
import numpy as np
from sklearn.svm import LinearSVR
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from imports.matrix_helper import MatrixHelper
import collections

matrix_helper = MatrixHelper()

# Variables
list_deleted_components = True
results_count = 10

# get feature matrices and row names (component names)
matrices = matrix_helper.load_from_parse('data/feature_matrix_sparse_rev150000.pickle')
validation_matrices = matrix_helper.load_from_parse('data/feature_matrix_sparse_v3.pickle')

feature_matrix = matrices[0]
validation_feature_matrix = validation_matrices[0]
rows = matrices[1]
validation_rows = validation_matrices[1]

# count number of samples and features in the feature matrix
samples_count = feature_matrix.shape[0]
features_count = feature_matrix.shape[1] - 1

# Create Array (vulnerable_rows) with the names of all vulnerable components
vulnerable_indices = np.where(feature_matrix[:,-1] > 0)
vulnerable_rows = [rows[i] for i in (vulnerable_indices[0])]

# Create 2 matrices: One with the NOT vulnerable samples/components and one with their names
not_vulnerable_rows = []
not_vulnerable_matrix = []

for i in range(len(rows)):
    if rows[i] not in vulnerable_rows:   
        not_vulnerable_rows.append(rows[i])
        not_vulnerable_matrix.append(feature_matrix[i,:])
        
not_vulnerable_matrix = np.asarray(not_vulnerable_matrix)

# Split feature matrix into data and target
training_data = feature_matrix[:, range(features_count)]
training_target = feature_matrix[:, features_count]

# Create support vector regression
svr = LinearSVR(C=0.2)

# Fit model
svr.fit(training_data, training_target)

# Predict target for all components without any known vulnerabilities
target_prediction = svr.predict(not_vulnerable_matrix[:, range(features_count)])

# Create matrix with component names, predicted vulnerabilities and actual number of vulnerabilities in validation revision
compare_matrix = []
for i in range(len(not_vulnerable_rows)):
    if not_vulnerable_rows[i] in validation_rows:
        validation_index = validation_rows.index(not_vulnerable_rows[i])
        compare_matrix.append([not_vulnerable_rows[i], target_prediction[i], validation_feature_matrix[validation_index, -1]])
    elif list_deleted_components:
        compare_matrix.append([not_vulnerable_rows[i], target_prediction[i], 'Deleted'])

compare_matrix = np.array(compare_matrix)
sorted_indeces = np.array(compare_matrix[:,1], dtype='f').argsort()[::-1]
compare_matrix_sorted = compare_matrix[sorted_indeces]

for i in range(0, results_count):
    component = compare_matrix_sorted[i,:]
    print("{:>2}. {:25} Predicted: {:>3} \t Actual in new Revision: {:>3}".format(i+1, component[0], round(float(component[1])), component[2]))


 1. Interpreter               Predicted: 9.0 	 Actual in new Revision:   6
 2. Runtime                   Predicted: 8.0 	 Actual in new Revision:  11
 3. OldDebugAPI               Predicted: 7.0 	 Actual in new Revision: Deleted
 4. Sandbox                   Predicted: 6.0 	 Actual in new Revision:   1
 5. nsEventListenerService    Predicted: 4.0 	 Actual in new Revision: Deleted
 6. nsImageFrame              Predicted: 4.0 	 Actual in new Revision:   0
 7. RootMarking               Predicted: 4.0 	 Actual in new Revision:   1
 8. nsJSConfigTriggers        Predicted: 4.0 	 Actual in new Revision:   0
 9. jsd_xpc                   Predicted: 4.0 	 Actual in new Revision: Deleted
10. Element                   Predicted: 4.0 	 Actual in new Revision:   4
