In [38]:
from models.model import *
from code2seq.code2seq import predict as predict_embeddings
import data_aggregation.get_features
from models.catboost_model import load_catboost_model
from embeddings.match_embeddings_with_methods import match_embeddings_with_methods_from_df
from data_aggregation.union_predictions_and_features import union_preds_features
import json
import base64

class BugLocalizationModelAPI:
    def __init__(self, lstm_model_path='', cb_model_path='', frames_limit=384):
        self.model = None
        if lstm_model_path:
            self.model = BugLocalizationModel()
            self.model.load_model(lstm_model_path)

        if cb_model_path:
            self.cb_model = load_catboost_model(cb_model_path)

        self.code2seq_predictor = None
        self.frames_limit = 384


    def get_code_features(self, methods_data):
        self.feature_extractor = data_aggregation.get_features.FeatureExtractor()
        for method in methods_data:
            self.feature_extractor.get_feature_from_code(method['code'])
            self.feature_extractor.get_feature_from_code(method['meta'])
        return self.feature_extractor.to_pandas()

    def collect_data_for_catboost(self, methods_data, lstm_prediction):
        code_features_df = self.get_code_features(methods_data)
        frames_len = len(methods_data)
        df_preds = self.model_prediction_to_df(lstm_prediction, frames_len)
        df_all = union_preds_features(df_preds, code_features_df)
        df_all = df_all.drop(['label', 'method_name', 'report_id', 'indices'], axis=1)
        return df_all

    def predict_bug_lstm(self, embeddings, top_k=3):
        prediction = self.model.model(FloatTensor(embeddings))[:,:, 1]
        prediction = prediction.flatten()
        return (-prediction).argsort()[:top_k], prediction

    def predict_bug_cb(self, catboost_data, top_k=3):
        prediction = self.cb_model.predict_proba(catboost_data)[:,:, 1]
        prediction = prediction.flatten()
        return (-prediction).argsort()[:top_k], prediction
        
    def model_prediction_to_df(self, prediction, frames_len):
        return pd.DataFrame({'report_id':np.zeros(frames_len), 'method_stack_position': np.arange(0,frames_len), 'lstm_prediction':prediction})

    def predict(self, methods_data, pred_type='lstm', top_k=3):
        embeddings = self.get_embeddings(methods_data)
        top_k_pred, lstm_prediction = self.predict_bug_lstm(embeddings, top_k)
        if pred_type == 'lstm':
            return top_k_pred, lstm_prediction

        catboost_data = self.collect_data_for_catboost(methods_data, lstm_prediction)
        
        if pred_type == 'all':
            return self.predict_bug_cb(catboost_data, top_k)


    def get_embeddings(self, methods_data):
        if self.code2seq_predictor is None:
            self.code2seq_predictor = predict_embeddings()
        methods_embeddings = []
        for method in methods_data:
            embeddings_df = None
            if method['code']:
                embeddings_df = self.code2seq_predictor.predict_by_code(base64.b64decode(method['code']).decode("UTF-8"))
            embedding = match_embeddings_with_methods_from_df(embeddings_df, method['meta'])
            methods_embeddings.append(embedding)
        frames_len = len(methods_data)

        for _ in range(frames_len, self.frames_limit):
            methods_embeddings.append(np.zeros(384))

        return np.array(methods_embeddings)

    
api = BugLocalizationModelAPI()
api.predict(methods)


ModuleNotFoundError: No module named 'config'

In [36]:
import json
import base64
f = json.load(open('/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4.json', 'r'))
methods = []
methods_meta = []
for i, frame in enumerate(f['frames']):
    print(frame['file_name'])
    method_meta = {}
    method_meta['class'] = f['class']
    method_meta['id'] = f['id']
    method_meta['pos'] = i
    method_meta['line_number'] = frame['line_number']
    method_meta['label'] = 0
    method_meta['method_name'] = frame['method_name']
    methods_meta.append(methods_meta)
    try:
        print(os.path.join('/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4', frame['file_name']))
        code = open(os.path.join('/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4', frame['file_name']), 'r')
        b = "".join(code.readlines()).encode("UTF-8")
        e = base64.b64encode(b).decode("UTF-8")
        methods.append({'code':e, 'meta':method_meta})
    except Exception:
        print(frame['file_name'])
        methods.append({'code':base64.b64encode("".encode("UTF-8")).decode("UTF-8"), 'meta':method_meta})


Line.java
/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4/Line.java
Line.java
/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4/Line.java
SubLine.java
/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4/SubLine.java
SubLineTest.java
/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4/SubLineTest.java
Native
/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4/Native
Native
NativeMethodAccessorImpl.java
/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4/NativeMethodAccessorImpl.java
NativeMethodAccessorImpl.java
DelegatingMethodAccessorImpl.java
/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4/DelegatingMethodAccessorImpl.java
DelegatingMethodAccessorImpl.java
Method.java
/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4/Method.java
Method.java
FrameworkMethod.java
/Users/e.poslovskaya/bug_ml_copy_2/bug_ml_copy_2/ex_reports/4/FrameworkMethod.java
FrameworkMethod.java
ReflectiveCallable.java
/Users

In [37]:
json.dump(methods, open("ex_api_stacktrace.json", "w"))

In [1]:
#from code2seq.code2seq import predict
from embeddings import match_embeddings_with_methods
import numpy as np
import sys
import os

PACKAGE_PARENT = '../code2seq'
sys.path.append(os.path.normpath(PACKAGE_PARENT))
PACKAGE_DATA_AGG = '../data_aggregation'
sys.path.append(os.path.normpath(PACKAGE_DATA_AGG))
PACKAGE_DATA_AGG = '../embeddings'
sys.path.append(os.path.normpath(PACKAGE_DATA_AGG))

def embed_files(path_to_files, path_to_methods):
    embeddings = match_embeddings_with_methods.process_data(path_to_files, path_to_methods)
    embeddings = np.array(embeddings).reshape(1, -1, 320)
    return embeddings


def predict_bug(model, embeddings):
    prediction = model.model(FloatTensor(embeddings))[:,:, 1]
    prediction = prediction.flatten()
    return (-prediction).argsort()[:3], prediction


path_to_files = '/Users/e.poslovskaya/bug_ml/ex_reports/4'
path_to_methods = '/Users/e.poslovskaya/bug_ml/ex_reports/stacktrace_ex.json'

embeddings = embed_files(path_to_files, path_to_methods)
predict_bug(model, embeddings)

NameError: name 'model' is not defined