In [2]:
import numpy as np
import matplotlib.pyplot as plt
import copy
import collections

from sklearn.svm import SVC
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_recall_curve

from imports.matrix_helper import MatrixHelper

matrix_helper = MatrixHelper()

# get feature matrices and row names (component names)
matrices = matrix_helper.load_from_parse('data/semiannual/matrix_classification_2014-03-17.pickle')
validation_matrices = matrix_helper.load_from_parse('data/semiannual/matrix_classification_2017-03-09.pickle')

feature_matrix = matrices[0]
validation_feature_matrix = validation_matrices[0]
rows = matrices[1]
validation_rows = validation_matrices[1]

# count number of samples and features in the feature matrix
samples_count = feature_matrix.shape[0]
features_count = feature_matrix.shape[1] - 1

# Create Array (vulnerable_rows) with the names of all vulnerable components
vulnerable_indices = np.where(feature_matrix[:,-1] > 0)
vulnerable_rows = [rows[i] for i in (vulnerable_indices[0])]

# Create 2 matrices: One with the NOT vulnerable samples/components and one with their names
not_vulnerable_rows = []
not_vulnerable_matrix = []

for i in range(len(rows)):
    if rows[i] not in vulnerable_rows:   
        not_vulnerable_rows.append(rows[i])
        not_vulnerable_matrix.append(feature_matrix[i,:])
        
not_vulnerable_matrix = np.asarray(not_vulnerable_matrix)

# Split feature matrix into data and target
training_data = feature_matrix[:, range(features_count)]
training_target = feature_matrix[:, features_count]


# Create svm classifier
clf = SVC(kernel='linear', C=0.2)

# Fit model
clf.fit(training_data, training_target)

# Predict data
target_prediction = clf.predict(not_vulnerable_matrix[:, range(features_count)])

# Create matrix with component names, predicted vulnerabilities and actual vulnerabilities in validation revision
compare_matrix = []
for i in range(len(not_vulnerable_rows)):
    if not_vulnerable_rows[i] in validation_rows:
        validation_index = validation_rows.index(not_vulnerable_rows[i])
        compare_matrix.append([not_vulnerable_rows[i], target_prediction[i], validation_feature_matrix[validation_index, -1]])

compare_matrix = np.array(compare_matrix)

# Calculate precision and recall 
precision, recall, thresholds = precision_recall_curve(np.array(compare_matrix[:, 2], dtype='f'), np.array(compare_matrix[:, 1], dtype='f'))

print('Precision: {}, Recall: {}'.format(precision[1], recall[1]))

#print('--------------')
#for component in compare_matrix:
#    print("{:25} Predicted: {:>3} \t Actual in new Revision: {:>3}".format(component[0], round(float(component[1])), component[2]))

Precision: 0.2, Recall: 0.00218818380744


In [None]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_recall_curve
from scipy.stats import rankdata
from scipy.stats import spearmanr

from imports.matrix_helper import MatrixHelper
from imports.prediction_helper import PredictionHelper

# get feature matrices and row names (component names)
matrix_helper = MatrixHelper()
matrices = matrix_helper.load_from_parse('data/semiannual/matrix_classification_2014-03-17.pickle')
validation_matrices = matrix_helper.load_from_parse('data/semiannual/matrix_classification_2017-03-09.pickle')

# instantiate Prediction Helper Class and predict values for compare matrix with regression
prediction_helper = PredictionHelper()
prediction_helper.calculate_semiannual_compare_matrix(matrices, validation_matrices, 'SVM')

compare_matrix = prediction_helper.get_compare_matrix()

precision, recall, thresholds = precision_recall_curve(np.array(compare_matrix[:, 2], dtype='f'), np.array(compare_matrix[:, 1], dtype='f'))

print('Precision: {}, Recall: {}'.format(precision[1], recall[1]))