# Severis
We have reproduced SEVERIS (SEVERity ISsue assessment), a tool introduced in the the paper _Automated Severity Assessment of Software Defect Reports_ designed to automatically predict the severity of a bug.

In [1]:
# start weka
import weka.core.jvm as jvm
jvm.start()

DEBUG:weka.core.jvm:Adding bundled jars
DEBUG:weka.core.jvm:Classpath=['/home/jude/thesis-env/lib/python3.6/site-packages/javabridge/jars/rhino-1.7R4.jar', '/home/jude/thesis-env/lib/python3.6/site-packages/javabridge/jars/runnablequeue.jar', '/home/jude/thesis-env/lib/python3.6/site-packages/javabridge/jars/cpython.jar', '/home/jude/thesis-env/lib/python3.6/site-packages/weka/lib/python-weka-wrapper.jar', '/home/jude/thesis-env/lib/python3.6/site-packages/weka/lib/weka.jar']
DEBUG:weka.core.jvm:MaxHeapSize=default
DEBUG:weka.core.jvm:Package support disabled


In [2]:
# dataset file locations
pits_train = [
    '../../dataset/raw/pitsA.csv',
    '../../dataset/raw/pitsB.csv',
    '../../dataset/raw/pitsC.csv',
    '../../dataset/raw/pitsD.csv',
    '../../dataset/raw/pitsE.csv',
    '../../dataset/raw/pitsF.csv',
]

In [3]:
# load in dataset
input_dir = '../../results/severis/f/'
input_file = 'input_reports.arff'

from weka.core.converters import Loader
loader = Loader(classname="weka.core.converters.ArffLoader")
data = loader.load_file(input_dir + input_file)
data.class_is_last()

In [4]:
# preprocess data
from weka.filters import Filter
preprocess = Filter(
    classname='weka.filters.unsupervised.attribute.StringToWordVector', 
    options=[
        '-R', '1', 
        '-W', '100',
        '-prune-rate', '-1.0',
        '-C', '-T', '-I', 
        '-N', '2', 
        '-L',
        '-stemmer', 'weka.core.stemmers.SnowballStemmer',
        '-M', '1',
        '-stopwords-handler', 'weka.core.stopwords.WordsFromFile -stopwords /home/jude/nltk_data/corpora/stopwords/english',
        '-tokenizer', 'weka.core.tokenizers.WordTokenizer -delimiters " \\r\\n\\t.,;:-_\\\'\\"()?!0123456789"'
    ]
)
preprocess.inputformat(data)
filtered = preprocess.filter(data)
print(filtered)

@relation 'nasa-weka.filters.unsupervised.attribute.StringToWordVector-R1-W100-prune-rate-1.0-C-T-I-N2-L-stemmerweka.core.stemmers.SnowballStemmer-stopwords-handlerweka.core.stopwords.WordsFromFile -stopwords /home/jude/nltk_data/corpora/stopwords/english-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:-_\\\'\\\"()?!0123456789\"'

@attribute class-att {1,2,3,4,5}
@attribute destination numeric
@attribute memcpy numeric
@attribute pointer numeric
@attribute uninitalized numeric
@attribute use numeric
@attribute ac numeric
@attribute acs numeric
@attribute additional numeric
@attribute cdh numeric
@attribute command numeric
@attribute critical numeric
@attribute deleted numeric
@attribute detail numeric
@attribute discussed numeric
@attribute equality numeric
@attribute ffp numeric
@attribute floating numeric
@attribute fsw numeric
@attribute functions numeric
@attribute general numeric
@attribute generation numeric
@attribute incorrectly numeric
@attribute ins

In [5]:
# select attributes (features)
from weka.attribute_selection import ASSearch, ASEvaluation, AttributeSelection
search = ASSearch(classname="weka.attributeSelection.Ranker", options=['-T', '-1.7976931348623157E308', '-N', '100'])
evaluator = ASEvaluation(classname="weka.attributeSelection.InfoGainAttributeEval")
attsel = AttributeSelection()
attsel.search(search)
attsel.evaluator(evaluator)
attsel.select_attributes(filtered)

ranked_attributes = attsel.selected_attributes
num_attributes = filtered.num_attributes

remove_attributes = []
for i in range(1, num_attributes):
    if i not in ranked_attributes:
        remove_attributes.append(i)
#print(remove_attributes)

# get continuous ranges of attribute IDs
import numpy as np
remove_attributes = np.array(remove_attributes)

d = [i for i, df in enumerate(np.diff(remove_attributes)) if df!= 1] 
d = np.hstack([-1, d, len(remove_attributes)-1])  # add first and last elements 
d = np.vstack([d[:-1]+1, d[1:]]).T

#print(remove_attributes[d])

# remove attributes
pruned = filtered
att = ''
for i in remove_attributes[d]:
    lower = i[0]+1
    upper = i[1]+1
    
    if lower == upper:
        att = att + ',' + str(lower)
    else:
        att = att + ',' + str(lower) + '-' + str(upper)
#print(att)

remove = Filter(classname='weka.filters.unsupervised.attribute.Remove', options=['-R', att])
remove.inputformat(pruned)
pruned = remove.filter(pruned)
print(pruned)


@relation 'nasa-weka.filters.unsupervised.attribute.StringToWordVector-R1-W100-prune-rate-1.0-C-T-I-N2-L-stemmerweka.core.stemmers.SnowballStemmer-stopwords-handlerweka.core.stopwords.WordsFromFile -stopwords /home/jude/nltk_data/corpora/stopwords/english-M1-tokenizerweka.core.tokenizers.WordTokenizer -delimiters \" \\r\\n\\t.,;:-_\\\'\\\"()?!0123456789\"-weka.filters.unsupervised.attribute.Remove-R7-9,12-15,17,20,23-24,48,50-53,55,57-62,64-66,68-69,71-79,81-82,133-139,141-151,153,155-163,165-168,170-171,173-180,182-268,270-275'

@attribute class-att {1,2,3,4,5}
@attribute destination numeric
@attribute memcpy numeric
@attribute pointer numeric
@attribute uninitalized numeric
@attribute use numeric
@attribute cdh numeric
@attribute command numeric
@attribute equality numeric
@attribute floating numeric
@attribute fsw numeric
@attribute general numeric
@attribute generation numeric
@attribute mini numeric
@attribute missing numeric
@attribute none numeric
@attribute pb numeric
@attribut

In [6]:
# run 10 fold cross validation through JRip
from weka.classifiers import Classifier
cls  = Classifier(classname="weka.classifiers.rules.JRip", options= ['-F', '3', '-N', '2.0', '-O', '2', '-S', '1'])
cls.build_classifier(pruned)

print(cls)

from weka.classifiers import Evaluation
from weka.core.classes import Random
evl = Evaluation(pruned)
evl.crossvalidate_model(cls, pruned, 10, Random(1))

print(evl.percent_correct)
print(evl.summary())
print(evl.class_details())
print(evl.matrix())

JRIP rules:

(equality >= 1.832125) => class-att=2 (4.0/0.0)
(trace >= 1.325988) and (trace <= 1.396702) => class-att=2 (2.0/0.0)
(fsw <= 0.244119) and (code >= 1.184691) and (code <= 2.5536) => class-att=5 (12.0/4.0)
(fsw <= 0.216087) and (typo >= 3.366485) => class-att=5 (6.0/1.0)
(pb >= 0.672822) and (trace >= 1.849525) => class-att=4 (19.0/0.0)
(fsw <= 0.294736) and (cdh >= 4.249904) => class-att=4 (27.0/7.0)
(fsw <= 0.294736) and (fsw >= 0.289166) => class-att=4 (11.0/1.0)
(fsw <= 0.210757) and (design >= 1.642301) and (tp <= 0) => class-att=4 (15.0/1.0)
(fsw <= 0.210757) and (proc >= 0.710588) and (proc <= 0.871786) => class-att=4 (12.0/0.0)
(requirement <= 0) and (fsw >= 0.472328) and (fsw <= 0.584688) and (ambiguous <= 0) => class-att=4 (26.0/7.0)
 => class-att=3 (610.0/152.0)

Number of Rules : 11

70.43010752688173

Correctly Classified Instances         524               70.4301 %
Incorrectly Classified Instances       220               29.5699 %
Kappa statistic             