Recreating and Testing the Alternative Criticality Score Algorithm \\
Author: Blaise Swartwood

In [45]:
#importing required packages
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import math


Recreating only the alternative criticality score algorithm


In [46]:
#accessing data file
df = pd.read_csv('CSDataAsCSV.csv', usecols=[i for i in range(6, 18) if i != 16])
df.head()

Unnamed: 0,created_since,updated_since,contributor_count,org_count,commit_frequency,recent_release_count,updated_issues_count,closed_issues_count,issue_comment_frequency,github_mention_count,criticality_score
0,131,0,5000,10,1555.33,69,552,266,3.18,57193965,0.9252
1,209,0,2044,5,70.83,49,896,806,13.15,36120246,0.89559
2,162,0,3472,9,48.37,67,36122,34479,2.0,1168604,0.88907
3,97,0,4367,5,138.08,65,86352,84020,2.0,529999,0.884
4,151,0,3579,13,104.13,69,27449,26723,2.0,88134,0.87876


In [47]:
#loading in weights, max threshold, and min threshold
#0 is being used as the default minimum threshold value
data = [[1, 120, 0], [1, 120, 0], [2,5000,0], [1, 10,0],[1, 1000,0],[0.5, 26,0],[0.5, 5000,0],[0.5, 5000,0],[1, 15,0],[2, 500000,0]]
df2 = pd.DataFrame(data, columns=['Weight','Max','Min'])

#an updated change to remove the negative weight of updated_since
df['updated_since'] = 120 - np.minimum(df['updated_since'], 120)
df2

Unnamed: 0,Weight,Max,Min
0,1.0,120,0
1,1.0,120,0
2,2.0,5000,0
3,1.0,10,0
4,1.0,1000,0
5,0.5,26,0
6,0.5,5000,0
7,0.5,5000,0
8,1.0,15,0
9,2.0,500000,0


In [48]:
#summing the weights of all the features
weight = df2['Weight'].sum()
n = df2.shape[0]
df['alt_crit'] = 0

In [49]:
#implementation of the alternative criticality score calculation
for i in range(df.shape[0]):
    x = 1
    for index in range(10):
        w = df2['Weight'][index]
        u = df2['Max'][index]
        l = df2['Min'][index]
        s = df.iloc[i, index]
        r = u-l
        val = np.log(1+(np.minimum(np.maximum(s-l,0),r)))/np.log(1+r)
        val *= w/weight
        x *= (1 - val)
    df.loc[i, 'alt_crit'] = (1 - (np.power(x, 1/n))) * 10

In [50]:
#percent change between original and alternative criticality score
df['alt_percent_change'] = 100 * (df['alt_crit']-df['criticality_score'])/df['criticality_score']

In [51]:
df.head()

Unnamed: 0,created_since,updated_since,contributor_count,org_count,commit_frequency,recent_release_count,updated_issues_count,closed_issues_count,issue_comment_frequency,github_mention_count,criticality_score,alt_crit,alt_percent_change
0,131,120,5000,10,1555.33,69,552,266,3.18,57193965,0.9252,0.942394,1.858458
1,209,120,2044,5,70.83,49,896,806,13.15,36120246,0.89559,0.909949,1.603332
2,162,120,3472,9,48.37,67,36122,34479,2.0,1168604,0.88907,0.905144,1.807929
3,97,120,4367,5,138.08,65,86352,84020,2.0,529999,0.884,0.900406,1.855889
4,151,120,3579,13,104.13,69,27449,26723,2.0,88134,0.87876,0.891882,1.493272



Recreating the alternative algorithm while only using the recommended signals from the statistical analysis


In [52]:
#creating a new datafrome with the weight, max threshold, and min threshold
data = [[0.5, 120, 0], [2, 120, 0], [2,5000, 0], [2, 10, 0],[0.5, 26, 0],[2, 5000, 0],[2, 15, 0]]
df3 = pd.DataFrame(data, columns=['Weight','Max', 'Min'])

In [53]:
#recalculating weight
weight = df3['Weight'].sum()
df['new_alt_crit'] = 0

In [54]:
#implementation of alternative algorithm algorithm
for i in range(df.shape[0]):
    x = 1
    for index in range(7):
        w = df3['Weight'][index]
        u = df3['Max'][index]
        l = df3['Min'][index]
        s = df.iloc[i, index]
        r = u-l
        val = np.log(1+(np.minimum(np.maximum(s-l,0),r)))/np.log(1+r)
        val *= w/weight
        x *= (1 - val)
    df.loc[i, 'new_alt_crit'] = (1 - (np.power(x, 1/n))) * 10

In [55]:
#percent change from original criticality score to alt score with specific signals
df['new_alt_percent_change'] = 100 * (df['new_alt_crit']-df['criticality_score'])/df['criticality_score']

In [57]:
df.head()

Unnamed: 0,created_since,updated_since,contributor_count,org_count,commit_frequency,recent_release_count,updated_issues_count,closed_issues_count,issue_comment_frequency,github_mention_count,criticality_score,alt_crit,alt_percent_change,new_alt_crit,new_alt_percent_change
0,131,120,5000,10,1555.33,69,552,266,3.18,57193965,0.9252,0.942394,1.858458,0.943292,1.955424
1,209,120,2044,5,70.83,49,896,806,13.15,36120246,0.89559,0.909949,1.603332,0.865449,-3.365491
2,162,120,3472,9,48.37,67,36122,34479,2.0,1168604,0.88907,0.905144,1.807929,0.92612,4.167252
3,97,120,4367,5,138.08,65,86352,84020,2.0,529999,0.884,0.900406,1.855889,0.887286,0.371743
4,151,120,3579,13,104.13,69,27449,26723,2.0,88134,0.87876,0.891882,1.493272,0.935424,6.448145


In [56]:
#saving results to csv
df.to_csv("altCriticalityScore.csv", index=False)