# Bestimmung Top10 durch Regression mit SVR
Wir lernen ein SVR-Modell mit einer feature matrix an, welche auf dem aktuellen Stand des Repo oder einer älteren Revision basiert. Anschliessend erstellen wir eine feature matrix mit allen samples, die zu diesem Zeitpunkt keine bekannten Schwachstellen aufweisen und wenden das angelernte SVR-Modell darauf an. Aus dem berechneten target extrahieren wir die 10 Komponenten mit den meisten vorhergesagten Verwundbarkeiten.

In [5]:
import numpy as np
from sklearn.svm import LinearSVR
import matplotlib.pyplot as plt
from imports.sparse_reader import get_matrices

result_count = 10

# get feature matrix and row names (component names)
matrices = get_matrices('data/feature_matrix_sparse_rev150000.pickle')
#matrices = get_matrices('data/feature_matrix_sparse_v3.pickle')
feature_matrix = matrices[0]
rows = matrices[1]

# count number of samples and features in the feature matrix
samples_count = feature_matrix.shape[0]
features_count = feature_matrix.shape[1] - 1

# Create Array (vulnerable_rows) with the names of all vulnerable components
vulnerable_indices = np.where(feature_matrix[:,-1] > 0)
vulnerable_rows = [rows[i] for i in (vulnerable_indices[0])]

# Create 2 matrices: One with the NOT vulnerable samples/components and one with their names
not_vulnerable_rows = []
not_vulnerable_matrix = []

for i in range(len(rows)):
    if rows[i] not in vulnerable_rows:   
        not_vulnerable_rows.append(rows[i])
        not_vulnerable_matrix.append(feature_matrix[i,:])
        
not_vulnerable_matrix = np.asarray(not_vulnerable_matrix)


# Split feature matrix into data and target
training_data = feature_matrix[:, range(features_count)]
training_target = feature_matrix[:, features_count]

# Create support vector regression
svr = LinearSVR(C=0.2)

# Fit model
svr.fit(training_data, training_target)

# Predict target for all components without any known vulnerabilities
target = svr.predict(not_vulnerable_matrix[:, range(features_count)])

# Get indices of 10 highest elements in target and read the component names from the row-array
indices_highest_target = target.argsort()[(-1 * result_count):]
components = [not_vulnerable_rows[i] for i in (indices_highest_target)]

# Flag and parameters for comparison
compare = True
if compare:
    comp_matrices = get_matrices('data/feature_matrix_sparse_v3.pickle')
    comp_feature_matrix = comp_matrices[0]
    comp_rows = comp_matrices[1]

# Print Result
for i in range(result_count):
    component = components[(result_count - 1) - i]
    component_index = rows.index(component)
    current_vulnerable_count = feature_matrix[component_index, -1]
    component_index_filtered = not_vulnerable_rows.index(component)
    predicted_vulnerable_count = target[component_index_filtered]
    
    if not compare:
        print("{:>2}. {:25} Currently: {:>2}, Predicted: {:>3}".format(i+1, component, current_vulnerable_count, round(predicted_vulnerable_count)))
    else:
        comparison_result = 'Removed'
        if component in comp_rows: 
            comparison_result = comp_feature_matrix[comp_rows.index(component), -1]
        print("{:>2}. {:25} Currently: {:>2} \t Predicted: {:>3} \t in new Revision: {:>3}".format(i+1, component, current_vulnerable_count, round(predicted_vulnerable_count), comparison_result))



 1. Interpreter               Currently:  0 	 Predicted: 9.0 	 in new Revision:   6
 2. Runtime                   Currently:  0 	 Predicted: 8.0 	 in new Revision:  11
 3. OldDebugAPI               Currently:  0 	 Predicted: 7.0 	 in new Revision: Removed
 4. Sandbox                   Currently:  0 	 Predicted: 6.0 	 in new Revision:   1
 5. nsEventListenerService    Currently:  0 	 Predicted: 4.0 	 in new Revision: Removed
 6. nsImageFrame              Currently:  0 	 Predicted: 4.0 	 in new Revision:   0
 7. RootMarking               Currently:  0 	 Predicted: 4.0 	 in new Revision:   1
 8. nsJSConfigTriggers        Currently:  0 	 Predicted: 4.0 	 in new Revision:   0
 9. jsd_xpc                   Currently:  0 	 Predicted: 4.0 	 in new Revision: Removed
10. Element                   Currently:  0 	 Predicted: 4.0 	 in new Revision:   4
