In [4]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import precision_recall_fscore_support
from collections import Counter
from IPython.display import clear_output

from pyexplainer.pyexplainer_pyexplainer import PyExplainer
from pyexplainer.pyexplainer_pyexplainer import AutoSpearman

In [1]:
ranked_tp_features = open("ranked_feature_names/ranked_qt_tp_JITLine.txt").read().split('\n')[0:-1]
#ranked_tp_features = open("mydataset/100_correctly_predicted_buggy_fetature_names.txt").read().split('\n')[0:-1]
train_commit = open("mydataset/train_commit.txt").read().split('\n')[0:-1]
test_commit = open("mydataset/test_commit.txt").read().split('\n')[0:-1]
train_label = open("mydataset/train_label.txt").read().split('\n')[0:-1]
test_label = open("mydataset/test_label.txt").read().split('\n')[0:-1]
print(ranked_tp_features[0:10])
print(len(train_commit), len(test_commit), len(train_label), len(test_label))

['expr-name', 'expr-call-argument-expr-name', 'decl-name', 'expr-operator', 'expr-call-name', 'if-condition-expr-operator', 'decl-type-name', 'function_decl-name', 'decl-argument-expr-name', 'expr-call-name-name']
1709 734 1709 734


In [2]:
def findCorrectBuggy(yTest, yPredict):
    #length of these arrays will determine the number of correct buggy and non buggy commits
    CorrectBuggyCommits = []
    CorrectNonBuggyCommits = []
    
    #CorrectbuggyCount = 0
    #CorrectNonbuggyCount = 0
    for index in range(len(yTest)):
        if(yTest[index] == 1 and yPredict[index] == 1):
            #CorrectbuggyCount += 1
            CorrectBuggyCommits.append(test_commit[index])
        elif(yTest[index] == 0 and yPredict[index] == 0):
            #CorrectNonbuggyCount += 1
            CorrectNonBuggyCommits.append(test_commit[index])
            
    return CorrectBuggyCommits, CorrectNonBuggyCommits

In [11]:
# [['commit_id']+ranked_tp_features[0:]]
file_path = "mydataset/"
best_features = 6
#read three data files and merge into one dataframe 'inner_join' by 'commit_id'
# df_tp = pd.merge(pd.read_csv(file_path+ "qtbase_tp_dataset.csv"), #TP: Token Pattern Features
#                  pd.read_csv(file_path+ "qt_metrics.csv"),
#                  on='commit_id')[['commit_id']+ranked_tp_features[0:]]
df_tp = pd.read_csv(file_path+ "qtbase_tp_dataset.csv")[['commit_id']+ranked_tp_features[0:best_features]] #TP: Token Pattern Features
                 
df_tp.replace([np.inf, -np.inf], np.nan, inplace=True)
df_tp.fillna(0, inplace=True)

df_train = df_tp[df_tp["commit_id"].isin(train_commit)]
df_train = df_train.reset_index().drop(columns=['index'])

df_test = df_tp[df_tp["commit_id"].isin(test_commit)]
df_test = df_test.reset_index().drop(columns=['index'])

df_train.columns = ['col_' + str(col).replace('-', '_') for col in df_train.columns]
df_test.columns = ['col_' + str(col).replace('-', '_') for col in df_test.columns]

print(df_train.shape, df_test.shape)

df_train["bugcount"] = train_label
df_test["bugcount"] = test_label

print(df_train.shape, df_test.shape)

(1709, 7) (734, 7)
(1709, 8) (734, 8)


In [8]:
df_test.iloc[1:2, 2:]

Unnamed: 0,col_expr_call_argument_expr_name,col_decl_name,col_expr_operator,col_expr_call_name,col_if_condition_expr_operator,bugcount
1,0,8,31,0,0,1


In [9]:
y_train = df_train["bugcount"]
X_train = df_train.drop(['col_commit_id', 'bugcount'], axis=1)

y_test = df_test["bugcount"]
X_test = df_test.drop(['col_commit_id', 'bugcount'], axis=1)

rf_model = RandomForestClassifier(max_depth=100, random_state=42, n_estimators=100)


In [10]:
print(X_train.shape, y_train.shape)
#print(y_test.shape, list(y_test))
#print(list(y_preds.astype('int32')), "\n", list(y_test['bugcount'].astype('int32')))

(1709, 6) (1709,)


In [12]:
#run again for the best values: 
# X_train = X_train[list_features[0:best_features]]
# X_test = X_test[list_features[0:best_features]] 
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train.values.ravel())
y_preds = rf_model.predict(X_test) 

CorrectBuggyCommits, CorrectNonBuggyCommits = findCorrectBuggy(list(y_test.astype('int32')), 
                                                                   list(y_preds.astype('int32')))

print("Best Number of Features:", best_features)
print("Number of Correct Buggy Commit Prediction: ", len(CorrectBuggyCommits))
print("Number of Correct Non-Buggy Commit Prediction: ", len(CorrectNonBuggyCommits))

Best Number of Features: 6
Number of Correct Buggy Commit Prediction:  112
Number of Correct Non-Buggy Commit Prediction:  267


In [13]:
# create a DataFrame which only has predicted label column
y_preds = pd.DataFrame(data={'PredictedBug': y_preds}, index=df_test.index) 
#y_preds.head(3)

combined_testing_data = X_test.join(y_test.to_frame())
combined_testing_data = combined_testing_data.join(y_preds)
combined_testing_data.head(3)
# total num of rows
total_rows = len(combined_testing_data)

In [14]:
correctly_predicted_data = combined_testing_data[combined_testing_data['bugcount']==combined_testing_data['PredictedBug']]
correctly_predicted_rows = len(correctly_predicted_data)
print("Total testing data:", total_rows)
print('The model correctly predicted ', round((correctly_predicted_rows / total_rows), 3) * 100, '% of testing data')

Total testing data: 734
The model correctly predicted  51.6 % of testing data


In [15]:
print(Counter(correctly_predicted_data['bugcount']))

Counter({'0': 267, '1': 112})


In [16]:
#Takes only the correctly predicted buggy data
correctly_predicted_bug = correctly_predicted_data[correctly_predicted_data['bugcount']=='1']
print(correctly_predicted_bug.shape)
#correctly_predicted_bug.head(3)

(112, 8)


In [17]:
# select all rows and feature cols
feature_cols = correctly_predicted_bug.iloc[:, :-2]
# selected all rows and one label col (either RealBug or PredictedBug is fine since they are the same)
label_col = correctly_predicted_bug.iloc[:, -2]

In [18]:
pyexp = PyExplainer(X_train = X_train,
                    y_train = y_train,
                    indep = X_train.columns,
                    dep = 'bugcount',
                    blackbox_model = rf_model)

In [19]:
list_of_rules = []
selected_row = 0
while(selected_row < len(feature_cols)):
    clear_output(wait=True)
    print("Working for: ", selected_row, "/", len(feature_cols))
    # select the row in X_test which contains all of the feature values
    X_explain = feature_cols.iloc[[selected_row]]
    # select the corresponding label from the DataFrame that we just created above
    y_explain = label_col.iloc[[selected_row]]

    #this will reset the previous column to make it zero and drop the column created by reset function. 
    #It is important for .visualize function, bug is in the visualize function. 
    X_explain = X_explain.reset_index().drop(columns=['index'])
    #y_explain do not need it
    #y_explain = y_explain.reset_index().drop(columns=['index']).squeeze()

    created_rules = pyexp.explain(X_explain=X_explain,
                                  y_explain=y_explain,
                                  search_function='crossoverinterpolation',
                                  random_state=0,
                                  reuse_local_model=True)
    
    list_of_rules.append(created_rules)
    selected_row +=1

# Please use the code below to visualise the generated PyExplainer explanation (What-If interactive visualisation).
#pyexp.visualise(created_rules, title="Why this file is defect-introducing ?")


Working for:  111 / 112
Random Perturbation only generated one class for the prediction column which means 
                     Random Perturbation is not compatible with the current data. 
                     The 'Crossover and Interpolation' approach is used as the alternative.


In [20]:
print(len(list_of_rules))

112


In [21]:
rule_index = 4
pyexp.visualise(list_of_rules[rule_index], title="Why this file is defect-introducing ?")

HBox(children=(Label(value='Risk Score: '), FloatProgress(value=0.0, bar_style='info', layout=Layout(width='40…

Output(layout=Layout(border='3px solid black'))

FloatSlider(value=1.0, continuous_update=False, description='#1 The value of col_if_condition_expr_operator is…

FloatSlider(value=1.0, continuous_update=False, description='#2 The value of col_decl_name is less than 1', la…

In [22]:
#list_of_rules[rule_index].keys()
print(list_of_rules[rule_index]['top_k_positive_rules']['rule'])

0      col_expr_name <= 9.490000247955322 & col_expr_...
1      col_expr_name <= 6.789999961853027 & col_expr_...
2      col_decl_name <= 8.25 & col_expr_call_name <= ...
3      col_decl_name > 0.3349999934434891 & col_if_co...
4      col_decl_name > 1.2549999952316284 & col_expr_...
                             ...                        
115    col_decl_name > 1.2649999856948853 & col_decl_...
116    col_decl_name <= 8.934999942779541 & col_decl_...
117    col_if_condition_expr_operator <= 2.5750000476...
118    col_expr_call_argument_expr_name > 5.565000057...
119    col_if_condition_expr_operator > 13.0500001907...
Name: rule, Length: 120, dtype: object


In [23]:
BuggyRuleDataset = [['commit_id', 'rule', 'counter' ]]
buggy_rule_features=[]
for rule_index in range(len(list_of_rules)):    
    single_rules = []
    for r in list_of_rules[rule_index]['top_k_positive_rules']['rule']:
        for sr in r.split('&'):
            single_rules.append(sr.strip())
            
    counted_rules = Counter(single_rules)
    for cr in counted_rules:
        BuggyRuleDataset.append([CorrectBuggyCommits[rule_index], cr.replace('col_', ''), counted_rules[cr]])
        if(cr.split()[0] not in buggy_rule_features):
            buggy_rule_features.append(cr.split()[0])

In [24]:
for br in BuggyRuleDataset:
    print(br)

['commit_id', 'rule', 'counter']
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'expr_name <= 9.490000247955322', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'expr_call_argument_expr_name > 3.5149999856948853', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'decl_name > 5.799999952316284', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'decl_name <= 15.420000076293945', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'if_condition_expr_operator > 7.664999961853027', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'expr_name <= 6.789999961853027', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'expr_call_name <= 9.945000171661377', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'expr_operator <= 7.359999895095825', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'expr_call_name > 0.5249999761581421', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'decl_name > 1.2549999952316284', 1]
['260bfaf056c08330ec2ed292e8d9def550e662ec', 'expr_call_name > 0.6699999868869781', 1]
['260bfaf05

['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'decl_name <= -6.360000133514404', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'expr_call_name > 11.970000267028809', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'expr_call_argument_expr_name > 7.860000133514404', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'if_condition_expr_operator <= 1.60999995470047', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'expr_operator > 20.55500030517578', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'if_condition_expr_operator <= 13.039999961853027', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'if_condition_expr_operator <= 6.53000020980835', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'expr_call_argument_expr_name <= 0.5850000083446503', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'decl_name <= 14.690000057220459', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'expr_name > 7.235000133514404', 1]
['0d308c4111f7cd322ed4a07e04b83c20e923e06a', 'decl_name > 6.4100000858306885', 1]
[

['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'if_condition_expr_operator > 14.375', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'expr_call_name <= 13.925000190734863', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'expr_call_argument_expr_name > 6.015000104904175', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'expr_call_name > 12.429999828338623', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'expr_call_argument_expr_name <= 7.8450000286102295', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'expr_call_name <= 9.994999885559082', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'expr_operator <= 0.23499999940395355', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'expr_call_argument_expr_name > 6.629999876022339', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'expr_name > -3.0', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'decl_name > -0.3149999976158142', 1]
['438211ec627073817fcaf6d3a07b76f2aa5d90e0', 'if_condition_expr_operator > 13.065000057220459', 1]
['438211ec6

['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'expr_name > 7.235000133514404', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'decl_name > 6.4100000858306885', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'decl_name > 14.300000190734863', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'decl_name > 1.4149999618530273', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'if_condition_expr_operator <= 17.354999542236328', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'expr_name > 3.5149999856948853', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'if_condition_expr_operator > 1.4649999737739563', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'decl_name <= -3.649999976158142', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'decl_name <= 17.55500030517578', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'if_condition_expr_operator > 18.015000343322754', 1]
['61b56a89a1cf8a388ff925492700e5eef019c3aa', 'expr_call_argument_expr_name > 0.574999988079071', 1]
['61b56a89a1cf8a388ff9254927

['fd619946be51784dc709363324897be6af144c52', 'decl_name <= 14.690000057220459', 1]
['fd619946be51784dc709363324897be6af144c52', 'expr_name > 7.235000133514404', 1]
['fd619946be51784dc709363324897be6af144c52', 'decl_name > 6.4100000858306885', 1]
['fd619946be51784dc709363324897be6af144c52', 'decl_name > 14.300000190734863', 1]
['fd619946be51784dc709363324897be6af144c52', 'decl_name > 1.4149999618530273', 1]
['fd619946be51784dc709363324897be6af144c52', 'if_condition_expr_operator <= 17.354999542236328', 1]
['fd619946be51784dc709363324897be6af144c52', 'expr_name > 3.5149999856948853', 1]
['fd619946be51784dc709363324897be6af144c52', 'if_condition_expr_operator > 1.4649999737739563', 1]
['fd619946be51784dc709363324897be6af144c52', 'decl_name <= -3.649999976158142', 1]
['fd619946be51784dc709363324897be6af144c52', 'decl_name <= 17.55500030517578', 1]
['fd619946be51784dc709363324897be6af144c52', 'if_condition_expr_operator > 18.015000343322754', 1]
['fd619946be51784dc709363324897be6af144c52', 

['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'expr_call_argument_expr_name > 0.5649999976158142', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'expr_call_name <= 8.40500020980835', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'expr_operator > 19.645000457763672', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'if_condition_expr_operator <= 14.805000305175781', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'decl_name <= -4.144999980926514', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'expr_call_name > 11.619999885559082', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'expr_call_argument_expr_name > 0.7150000035762787', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'expr_call_argument_expr_name <= -3.4950000047683716', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'if_condition_expr_operator <= 0.4400000050663948', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'if_condition_expr_operator > 14.540000438690186', 1]
['b7b573ad6d9f50ebff87f5c6204225366a1675bd', 'decl_nam

['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'expr_call_argument_expr_name <= 6.1000001430511475', 1]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'decl_name > 1.8450000286102295', 1]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'if_condition_expr_operator > 0.4050000011920929', 1]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'expr_call_argument_expr_name > 0.4950000047683716', 2]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'expr_operator > 0.3799999952316284', 1]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'expr_call_name > -2.8100000619888306', 1]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'expr_call_name <= 12.440000057220459', 1]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'expr_call_name > 4.690000057220459', 1]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'decl_name <= 14.365000247955322', 1]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'decl_name > 10.37999963760376', 1]
['fc0f784e54d5dce72cc6a7e4b1fad243dadfcd76', 'if_condition_expr_operator > 6.305000066757202', 1]
['fc0f

['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'expr_name <= 0.004999999888241291', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'expr_call_argument_expr_name <= 2.4250000715255737', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'decl_name > 8.454999923706055', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'expr_call_name <= 9.980000019073486', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'if_condition_expr_operator > 14.475000381469727', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'if_condition_expr_operator <= 8.950000286102295', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'expr_name <= -0.11999999918043613', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'expr_operator > -7.5950000286102295', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'expr_call_name <= 0.8650000095367432', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'expr_name > -9.639999866485596', 1]
['746f7a5b28d92d962ae261c52da4d750ea3b50f0', 'decl_name <= 1.5899999737739563', 1]
['746f7a5b28d92d962ae2

['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'expr_name > -3.0', 1]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'decl_name > -0.3149999976158142', 1]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'expr_call_argument_expr_name > -4.2749998569488525', 2]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'expr_operator > 0.29999999701976776', 1]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'expr_operator > 1.5699999928474426', 1]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'expr_name > 7.069999933242798', 1]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'if_condition_expr_operator <= 14.190000057220459', 1]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'if_condition_expr_operator > 6.289999961853027', 1]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'if_condition_expr_operator <= 6.9599997997283936', 1]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'expr_name <= 5.065000057220459', 1]
['cd91d8ad0281c984a01b8091696a6fdfdfa69514', 'if_condition_expr_operator <= 13.065000057220459', 1]
['cd91d8ad028

['b9362903b339e57362a7a3296904504521d0e26f', 'decl_name > 1.2649999856948853', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'decl_name > 6.0950000286102295', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'expr_call_argument_expr_name <= 0.9449999928474426', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'expr_call_argument_expr_name <= 0.7250000238418579', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'if_condition_expr_operator <= 14.46500015258789', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'decl_name > 6.069999933242798', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'decl_name <= 8.934999942779541', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'decl_name <= 15.065000057220459', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'decl_name > 10.875', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'if_condition_expr_operator <= 2.575000047683716', 1]
['b9362903b339e57362a7a3296904504521d0e26f', 'expr_call_argument_expr_name > 5.565000057220459', 1]
['b9362903b339e573

['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'expr_operator <= 2.5999999046325684', 1]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'expr_operator > 1.475000023841858', 1]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'expr_operator <= 0.5299999862909317', 1]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'expr_call_argument_expr_name <= 0.48000000417232513', 1]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'expr_name <= 7.115000009536743', 1]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'expr_call_argument_expr_name > -1.1299999952316284', 1]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'if_condition_expr_operator > 5.490000009536743', 1]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'expr_call_argument_expr_name > 5.680000066757202', 1]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'expr_call_name <= 9.920000076293945', 2]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'decl_name <= 7.950000047683716', 1]
['4de3c5db238f45404feb6c6ce60810a3e11eae84', 'if_condition_expr_operator <= 7.805000066

['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'expr_call_argument_expr_name > -1.1299999952316284', 1]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'if_condition_expr_operator <= 14.449999809265137', 1]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'decl_name <= 14.269999980926514', 1]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'expr_call_name > 4.8450000286102295', 1]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'if_condition_expr_operator > 5.490000009536743', 1]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'expr_call_argument_expr_name > 5.680000066757202', 1]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'expr_call_name <= 9.920000076293945', 2]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'decl_name <= 7.950000047683716', 1]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'if_condition_expr_operator <= 7.805000066757202', 1]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'expr_operator <= 2.415000081062317', 1]
['eb88aaa3a2aed3235cf837e858f530f6a521349f', 'expr_call_argument_expr_name >

['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'expr_call_name <= 9.920000076293945', 2]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'decl_name <= 7.950000047683716', 1]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'if_condition_expr_operator <= 7.805000066757202', 1]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'expr_operator <= 2.415000081062317', 1]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'expr_call_name <= 8.625', 1]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'expr_call_argument_expr_name > 0.5149999856948853', 1]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'decl_name <= 14.699999809265137', 1]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'expr_call_argument_expr_name > 5.265000104904175', 1]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'expr_call_argument_expr_name > 1.1050000190734863', 1]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'expr_name <= 8.235000133514404', 1]
['afd6313755399b24fb97e56f730f9898a0fa8f5a', 'expr_call_argument_expr_name > 0.510000005364418', 1]
['af

In [25]:
print(len(buggy_rule_features))
for brf in buggy_rule_features:
    print(brf.replace('col_', '').replace('_', '-'))

6
expr-name
expr-call-argument-expr-name
decl-name
if-condition-expr-operator
expr-call-name
expr-operator


In [47]:
rule_report = []
list_of_rules = []
for br in BuggyRuleDataset:
    list_of_rules.append(br[1])

counted_rule = Counter(list_of_rules)
for cr in counted_rule:
    rule_report.append([cr, counted_rule[cr]])

In [50]:
sorted_rules = pd.DataFrame(rule_report, columns=['rule', 'count']).sort_values(by=['count'], ascending=False)

In [54]:
final_text = "Count, Rule\n"
for i in sorted_rules.index:
    final_text += str(sorted_rules['count'][i])+": "+sorted_rules['rule'][i]+'\n'
    
print(final_text)

Count, Rule
112: decl_name <= -0.3149999976158142
110: if_condition_expr_operator <= 7.805000066757202
110: expr_operator <= 2.415000081062317
110: expr_call_argument_expr_name > 6.015000104904175
109: decl_name <= 15.315000057220459
109: expr_operator > 25.09000015258789
109: expr_name <= 9.529999732971191
109: decl_name <= 9.684999942779541
109: expr_call_name > 12.440000057220459
109: if_condition_expr_operator > 14.375
108: if_condition_expr_operator <= 1.60999995470047
108: expr_call_argument_expr_name > 7.860000133514404
108: if_condition_expr_operator <= 6.53000020980835
108: decl_name <= 15.635000228881836
108: expr_call_name > 11.970000267028809
108: decl_name > 1.784999966621399
108: expr_name <= -3.4950000047683716
107: expr_call_argument_expr_name > -4.2749998569488525
106: decl_name > 1.1850000023841858
106: if_condition_expr_operator <= 15.315000057220459
106: expr_operator > 0.4699999988079071
106: expr_call_argument_expr_name <= 0.5049999952316284
106: expr_call_name <=