In [1]:
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_recall_curve
from scipy.stats import spearmanr

from imports.matrix_helper import MatrixHelper
from imports.svr_helper import SVRHelper

# get feature matrices and row names (component names)
matrix_helper = MatrixHelper()
matrices = matrix_helper.load_from_parse('data/semiannual/matrix_regression_2007-06-21.pickle')
validation_matrices = matrix_helper.load_from_parse('data/semiannual/matrix_regression_2017-03-09.pickle')

# instantiate SVR Helper Class and predict values for compare matrix
svr_helper = SVRHelper()
svr_helper.calculate_semiannual_compare_matrix(matrices, validation_matrices)

# Calculate the mean square error between the actual vulnerabilities in the validation matrix and the predicted values
compare_matrix_sorted = svr_helper.get_compare_matrix_sorted()
mse = mean_squared_error(np.array(compare_matrix_sorted[:,2], dtype='f'), np.array(compare_matrix_sorted[:,1], dtype='f'))
print('Mean square error: {}'.format(mse))

# Get the Top 1% and calcualte again the mean square error
compare_matrix_top = svr_helper.get_compare_matrix_top()
mse_top = mean_squared_error(np.array(compare_matrix_top[:,2], dtype='f'), np.array(compare_matrix_top[:,1], dtype='f'))
print('Mean square error with top 1%: {}'.format(mse_top))


# Get the Top 1% and calculate the spearman correlation coefficient
coefficient = spearmanr(compare_matrix_top[:,1], compare_matrix_top[:,2])
print('Spearman correlation coefficient: {}'.format(coefficient[0]))


Mean square error: 6.13137578964
Mean square error with top 1%: 91.2247924805
Spearman correlation coefficient: 0.356678733676




## Regression with multiple matrices

In [8]:
import numpy as np
import matplotlib.pyplot as plt
import os

from sklearn.metrics import mean_squared_error
from sklearn.metrics import precision_recall_curve
from scipy.stats import spearmanr

from imports.matrix_helper import MatrixHelper
from imports.svr_helper import SVRHelper

matrix_helper = MatrixHelper()

def validate(matrices, validation_matrices):
    # instantiate SVR Helper Class and predict values for compare matrix
    svr_helper = SVRHelper()
    svr_helper.calculate_semiannual_compare_matrix(matrices, validation_matrices)

    # Calculate the mean square error between the actual vulnerabilities in the validation matrix and the predicted values
    compare_matrix_sorted = svr_helper.get_compare_matrix_sorted()
    mse = mean_squared_error(np.array(compare_matrix_sorted[:,2], dtype='f'), np.array(compare_matrix_sorted[:,1], dtype='f'))

    # Get the Top 1% and calcualte again the mean square error
    compare_matrix_top = svr_helper.get_compare_matrix_top()
    mse_top = mean_squared_error(np.array(compare_matrix_top[:,2], dtype='f'), np.array(compare_matrix_top[:,1], dtype='f'))


    # Get the Top 1% and calculate the spearman correlation coefficient
    spearman = spearmanr(compare_matrix_top[:,1], compare_matrix_top[:,2])[0]

    return mse, mse_top, spearman

validation_date = '2017-03-09'
counter = 0

for file in os.listdir("data/semiannual"):
    if file != '.DS_Store' and file.split('_', 2)[1] == 'regression':
        date = file.split('_', 2)[2].split('.', 1)[0]
        if (date != validation_date and counter < 3):
            counter += 1
            matrices = matrix_helper.load_from_parse('data/semiannual/' + file)
            validation_matrices = matrix_helper.load_from_parse('data/semiannual/matrix_regression_' + validation_date + '.pickle')
            mse, mse_top, spearman = validate(matrices, validation_matrices)
            print('{} - MSE: {}    MSE Top 1%: {}    Spearman coefficient: {}'.format(date, mse, mse_top, spearman))



2007-03-22 - MSE: 16.6169605255    MSE Top 1%: 0.571428596973    Spearman coefficient: nan
2007-06-21 - MSE: 6.13131999969    MSE Top 1%: 91.2246704102    Spearman coefficient: 0.356678733676
2007-09-23 - MSE: 4.47656202316    MSE Top 1%: 51.9271125793    Spearman coefficient: 0.607613826231
