In [None]:
%cd ..
%pwd

In [None]:
from experiments.modeleval_utils import *

In [None]:
# no feature selection
#output_dir = 'experiments/results'
#drop_columns = False # feature selection

# feature selection
output_dir = 'experiments/results_FS'
drop_columns = True 

In [None]:
def fit_best_model(model, data, feature_type, target, scoring):
    X, y, features = data_map[feature_type][data](target, drop_columns)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False)
    pipeline, best_params, best_result = get_pipeline(output_dir, data, feature_type, target, scoring, model)
    pipeline.fit(X_train, y_train)
    t, f1 = get_best_f1_threshold(pipeline, X_train, y_train)
    return pipeline, t

In [None]:
def predict_at_threshold(clf, X, t):
    y_score = get_y_score(clf, X)
    return (y_score >= t).astype(int)

In [None]:
datas = {
    'szz_traditional': data_map['traditional']['fixed_defect_szz']('performance', drop_columns),
    'szz_bow': data_map['bow']['fixed_defect_szz']('performance'),
    'bugbug_traditional': data_map['traditional']['bugbug_buglevel']('performance', drop_columns),
    'bugbug_bow': data_map['bow']['bugbug_buglevel']('performance'),
    'bugbug_reg_traditional': data_map['traditional']['bugbug_buglevel']('regression', drop_columns),
    'bugbug_reg_bow': data_map['bow']['bugbug_buglevel']('regression')
}

In [None]:
pipelines = {
    'szz_traditional': fit_best_model('lr', 'fixed_defect_szz', 'traditional', 'performance', 'average_precision'),
    'szz_bow': fit_best_model('lr', 'fixed_defect_szz', 'bow', 'performance', 'average_precision'),
    'bugbug_traditional': fit_best_model('lr', 'bugbug_buglevel', 'traditional', 'performance', 'average_precision'),
    'bugbug_bow': fit_best_model('lr', 'bugbug_buglevel', 'bow', 'performance', 'average_precision'),
    'bugbug_reg_traditional': fit_best_model('lr', 'bugbug_buglevel', 'traditional', 'regression', 'average_precision'),
    'bugbug_reg_bow': fit_best_model('lr', 'bugbug_buglevel', 'bow', 'regression', 'average_precision')
}

In [None]:
from feature_name_map import feature_name_map

In [None]:
import shap
kind = 'bugbug_traditional'

X, y, f = datas[kind]
pipeline, t = pipelines[kind]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, shuffle=False)

clf = pipeline['model']
if 'traditional' in kind:
    X_ = pipeline['scaler'].transform(X_test)
    f = f.drop('target', axis=1)
    feature_names = [feature_name_map[c] for c in f.columns]
else:
    X_ = X_test.toarray()
    feature_names = f

explainer = shap.Explainer(clf, X_, feature_names=feature_names)
print(explainer)
shap_values = explainer.shap_values(X_)

shap.summary_plot(shap_values, X_, max_display=15, feature_names=feature_names, show=False)
plt.tight_layout()
plt.savefig(os.path.join(output_dir, f'plots_interpretability/summary_{kind}.pdf'))

In [None]:
commitlevel_ix = 90291
buglevel_ix = 61674
ix = buglevel_ix if 'bugbug' in kind else commitlevel_ix

x = X_[[ix - X_train.shape[0]],:]

explainer = shap.Explainer(clf, X_, feature_names=feature_names)
shap_values = explainer(x)
plt.ioff()
shap.waterfall_plot(shap_values[0], show=False)
#plt.gcf()
plt.tight_layout()
plt.savefig(os.path.join(output_dir, f'plots_interpretability/waterfall_{kind}.pdf'))

#### Finding candidate commit for investigation

In [None]:
F_buglevel = pd.read_csv('data/feature_extractor/features_buglevel.csv')
F_commitlevel = pd.read_csv('data/feature_extractor/features_commitlevel.csv')

rev_to_commitlevel_ix = {rev: i for i, rev in enumerate(F_commitlevel['revision'])}
commitlevel_ix_to_rev = list(F_commitlevel['revision'])

rev_to_buglevel_ix = {}
buglevel_ix_to_rev = list(F_buglevel['revisions'])
for i, row in F_buglevel[['first_revision', 'revisions']].iterrows():
    first_rev = row['first_revision']
    revs = row['revisions']
    #print(i, first_rev, revs)
    for rev in revs.split(','):
        rev_to_buglevel_ix[rev] = i

del F_buglevel; del F_commitlevel

In [None]:
len(rev_to_commitlevel_ix), len(commitlevel_ix_to_rev), len(rev_to_buglevel_ix), len(buglevel_ix_to_rev)

In [None]:
szz_tps = read_data_from_json('experiments/results/szz_regressed_by_tp.json')

In [None]:
szz_tps_buglevel_ix = []
szz_tps_commitlevel_ix = []

for rev in szz_tps:
    try:
        szz_tps_buglevel_ix.append(rev_to_buglevel_ix[rev])
    except KeyError:
        pass
    try:
        szz_tps_commitlevel_ix.append(rev_to_commitlevel_ix[rev])
    except KeyError:
        pass
    
szz_tps_buglevel_ix = sorted(list(set(szz_tps_buglevel_ix)))
szz_tps_commitlevel_ix = sorted(list(set(szz_tps_commitlevel_ix)))
len(szz_tps_buglevel_ix), len(szz_tps_commitlevel_ix)

In [None]:
kind = 'szz'
szz_tps_ixs = szz_tps_commitlevel_ix if kind == 'szz' else szz_tps_buglevel_ix
szz_tps_ixs = np.array(szz_tps_ixs)

X, y, _ = datas[kind + '_traditional']
clf, t = pipelines[kind + '_traditional']
y_pred_traditional = predict_at_threshold(clf, X[szz_tps_ixs, :], t)

print(y[szz_tps_ixs].sum(), len(szz_tps_ixs))

X, y, _ = datas[kind + '_bow']
clf, t = pipelines[kind + '_bow']
y_pred_bow = predict_at_threshold(clf, X[szz_tps_ixs, :], t)

y_pred_traditional.sum(), y_pred_bow.sum()

In [None]:
test_threshold = X.shape[0] * 0.9
disagreements = szz_tps_ixs[(y_pred_traditional < y_pred_bow)]
disagreements[disagreements > test_threshold]

In [None]:
commit_ix = 90291
rev = commitlevel_ix_to_rev[commit_ix]
bug_ix = rev_to_buglevel_ix[rev]
rev, bug_ix, buglevel_ix_to_rev[bug_ix]

In [None]:
kind = 'szz'
ix = bug_ix if 'bugbug' in kind else commit_ix

X, y, _ = datas[kind + '_traditional']
clf, t = pipelines[kind + '_traditional']
y_pred_traditional = predict_at_threshold(clf, X[[ix], :], t)

X, y, _ = datas[kind + '_bow']
clf, t = pipelines[kind + '_bow']
y_pred_bow = predict_at_threshold(clf, X[[ix], :], t)

y_pred_traditional[0], y_pred_bow[0]