<center><h1>GlobalWEBugLocator</h1></center> 

- In this method we perform the localization directly and indirectly using the the TFIDF similarity score of bug reports to source codes in each project individually. Note that TF.IDF scores are calculated using the local data. That means that for each term we calculate the term frequency and inverse document frequency of that term using only the bug reports and source codes of that project. 
- Note that both TF.IDF and Doc2Vec scores are calculated using the __global data__. That means that for each term we calculate the term frequency and inverse document frequency of that term using all bug reports and source codes of all projects. 
- Refer to the third row of the following table (Method7 - Global Doc2Vec + TFIDF) to better understand the details of this method.

<img src="Methods.png">

### Libraries

In [10]:
from __future__ import division
import pandas as pd
import numpy as np

import os
from os import listdir
from os.path import isfile, join

import gensim
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
from gensim.test.test_doc2vec import ConcatenatedDoc2Vec
from gensim.test.utils import get_tmpfile
from gensim import similarities
from gensim import models

import traceback
import logging

import operator
import warnings
import glob
import math 
from scipy import spatial
import scipy.spatial.distance
import multiprocessing
from tqdm import tqdm_notebook
from time import gmtime, strftime

warnings.simplefilter(action='ignore', category=FutureWarning)

<center><h1>Evaluators</h1></center> 

In [11]:
def evaluate_helper(ranked_files,fixes):
    """
    @Receives: list of ranked files(which is predicted by algorithm) and the ground truth(fixes)
    @Process: This is a function aimed to help evaluate, and calculates the AP and first_pos 
    @Returns: A dictionary containing the AP(average precision) and first_pos (first retrieved fix file)
    """
    found=0
    first_pos=-1
    average_precision=0
    for i,predictionFix in enumerate(ranked_files):
        for actualFix in fixes:
            if actualFix.split('.')[-2] == predictionFix:
                if first_pos==-1:
                    first_pos=i+1
                found+=1
                average_precision+=found/(i+1)        

    AP=average_precision/found if found>0 else 0
    return {"AP":AP,"first_pos":first_pos}


def evaluate(all_bugs_df,source_codes_df):
    """
    @Receives: The main dataframe
    @Process: Evaluates the predicted files for each bugreport in all_bugs_df
    @Returns: MAP and MRR calculated from eligible bugreports(the ones with
    at least one fix file in this version of code) in the dataframe and number of eligible bug reports.
    """
    all_results=[]
    top_founds=[]
    average_precisions=[]
    for i,br in all_bugs_df.iterrows():
        if not source_codes_df.loc[source_codes_df.filename.apply(lambda filename: any(fix in filename for fix in br['fix']))].empty:
            predicted_files=br['total_score'].keys()
            result=evaluate_helper(predicted_files,br['fix'])
            top_founds.append(result['first_pos'])
            average_precisions.append(result['AP'])
            all_results.append(result)
        else:
            top_founds.append(-1)
            average_precisions.append(0.0)
    all_bugs_df["top_found"]=top_founds
    all_bugs_df["average_precision"]=average_precisions
    
    #Calculating the MAP and MRR
    MAP,MRR=(0,0)
    if len(all_results)>0:
        for result in all_results:
            MAP+=result['AP']
            MRR+=1/result['first_pos'] if result['first_pos']>0 else 0
        MAP/=len(all_results)
        MRR/=len(all_results)
        print("eligible_br_count: ",len(all_results))
    return (MAP,MRR,len(all_results))


<center><h1>TFIDF</h1></center> 

In [14]:
def getNormValue(x,maximum,minimum):
    return 6*((x - minimum)/(maximum - minimum))

def getLenScore(length):
    return (math.exp(length) / (1 + math.exp(length)))

def calulateLengthScore(source_codes_df):
    """
    Receives: a list of sizes of codes and the index
    Process: calculate a boost score for the specified index based on length of that code
    Returns: length boosting score 
    """
    average_size=source_codes_df['size'].mean()
    standard_deviation=source_codes_df['size'].std() 
    low=average_size-3*standard_deviation
    high= average_size+3*standard_deviation
    minimum=int(low) if low>0 else 0
        
    len_scores=[]
    for i,eachLen in source_codes_df['size'].items():
        score=0
        nor=getNormValue(eachLen,high,minimum)
        if eachLen!=0:
            if eachLen>=low and eachLen<=high:
                score=getLenScore(nor)
            elif eachLen<low:
                score=0.5
            elif eachLen>high:
                score = 1.0
        len_scores.append(score)
    source_codes_df['lengthScore']=len_scores

    return source_codes_df
    
def inverse_doc_freq(idf,D):
    return math.log(D/idf)

def term_freq(tf_list):
    return [(math.log(tf+1)) for tf in tf_list]

def np_normalizer(arr):
    """
    @Receives: a list of numbers
    @Process: normalizes all the values and map them to range of [0,1]
    @Returns: list of normalized numbers
    """
    if len(arr)>0:
        maximum=np.amax(arr)
        minimum=np.amin(arr)
        if maximum!=minimum:
            return (arr-minimum)/(maximum-minimum)
    return arr

def normalizer(Dict):
    """
    @Receives: a list of numbers
    @Process: normalizes all the values and map them to range of [0,1]
    @Returns: list of normalized numbers
    """
    if len(Dict)>0:
        maximum=max(Dict.items(), key=operator.itemgetter(1))[1]
        minimum=min(Dict.items(), key=operator.itemgetter(1))[1]
        for key,value in Dict.items():
            if maximum!=minimum:
                Dict[key]=(value-minimum)/(maximum-minimum)
            else:
                Dict[key]=1.0
            
    return Dict
    

def TFIDF_transform(all_bugs_df,source_codes_df):
    
    print("\tTransforming to TF.IDF ...")
    dictionary = gensim.corpora.Dictionary(list(source_codes_df['code']))
    corpus = [dictionary.doc2bow(doc) for doc in list(source_codes_df['code'])]
    tfidf_weights = models.TfidfModel(corpus,wlocal=term_freq,wglobal=inverse_doc_freq,normalize=False)
    source_codes_df['tfidf_vector']=tfidf_weights[corpus]
    all_bugs_df['tfidf_vector']=all_bugs_df.text.apply(lambda x: tfidf_weights[dictionary.doc2bow(x)])
    return (all_bugs_df,source_codes_df,len(dictionary))

def cos_matrix_multiplication(matrix, vector):
    """
    Calculating pairwise cosine distance using matrix vector multiplication.
    """
    dotted = matrix.dot(vector)
    matrix_norms = np.linalg.norm(matrix, axis=1)
    vector_norm = np.linalg.norm(vector)
    matrix_vector_norms = np.multiply(matrix_norms, vector_norm)
    neighbors = np.divide(dotted, matrix_vector_norms)
    return neighbors

<center><h1> Genral Global Doc2Vec Model </h1></center> 


In [15]:
def load_all_BRs(dataPath):
    print('\tLoading all bug reports ... ')
    all_bugs_df=pd.DataFrame([])
    all_groups=[folder for folder in listdir(dataPath)]
    for group in tqdm_notebook(all_groups):
        all_projects= [folder for folder in listdir(os.path.join(dataPath,group))]
        for project in all_projects:
            data_path=os.path.join(dataPath,group,project,"bugrepo","repository.xml")
            all_bugs_df=all_bugs_df.append(loadBugs2df(data_path,project))
            print(len(all_bugs_df))
    return all_bugs_df

def load_all_SCs(dataPath):
    print('\tLoading all source codes ... ')
    source_codes_df=pd.DataFrame([])
    all_groups=[folder for folder in listdir(dataPath)]
    for group in tqdm_notebook(all_groups):
        all_projects= [folder for folder in listdir(os.path.join(dataPath,group))]
        for project in all_projects:
            source_path=os.path.join(dataPath,group,project,"gitrepo")
            source_codes_df=source_codes_df.append(loadSourceFiles2df(source_path,group,project))
    return source_codes_df

In [16]:
def build_Doc2Vec_models(vec_size,alpha,window_size,all_bugs_df,source_codes_df):
    """
    Process: 1- Loads all the bug reports from all the group/projects in Data directory
             2- Makes a Doc2Vec model and trains it based on all the bugreports
    Returns: Trained model
    """
    print("\n\t Now building the Combined Doc2Vec model ... ")
    dmm_model_path=os.path.join(os.getcwd(),'Models','combined_doc2vec_model_dmm')
    dbow_model_path=os.path.join(os.getcwd(),'Models','combined_doc2vec_model_dbow')
    fname_dmm = get_tmpfile(dmm_model_path)
    fname_dbow = get_tmpfile(dbow_model_path)
    if os.path.isfile(dmm_model_path) and os.path.isfile(dbow_model_path):
        revectorize=False
        model_dmm = Doc2Vec.load(fname_dmm)
        model_dbow = Doc2Vec.load(fname_dbow)
        print("*** Combined Doc2Vec Model is Loaded. ***")            
    else:
        revectorize=True
        documents = [TaggedDocument(all_bugs_df.iloc[i].text, [i]) for i in range(len(all_bugs_df))]
        documents = documents + [TaggedDocument(source_codes_df.iloc[i].code, [len(all_bugs_df)+i]) for i in range(len(source_codes_df))]

        model_dmm = Doc2Vec(vector_size=vec_size, window=window_size, min_count=2,
                        workers=multiprocessing.cpu_count(),
                        alpha=alpha, min_alpha=alpha/2,dm=1)
        model_dmm.build_vocab(documents)
        model_dmm.train(documents,total_examples=model_dmm.corpus_count,epochs=20)
        model_dmm.delete_temporary_training_data(keep_doctags_vectors=True, keep_inference=True)
        model_dmm.save(fname_dmm)
        
        model_dbow = Doc2Vec(dm=0, vector_size=vec_size, negative=5,
                             hs=0,min_count=2, sample = 0, workers=multiprocessing.cpu_count(),
                             alpha=alpha, min_alpha=alpha/3)
        model_dbow.build_vocab(documents)
        model_dbow.train(documents,total_examples=model_dbow.corpus_count,epochs=20)
        model_dbow.delete_temporary_training_data(keep_doctags_vectors=True, keep_inference=True)
        model_dbow.save(fname_dbow)
        print("*** Combined Doc2Vec Model is Trained. ***")
    concatinated_model = ConcatenatedDoc2Vec([model_dbow, model_dmm])
    print(">> Size of Vocabulary is: {}".format(len(model_dmm.wv.vocab)))
    print(">> Number of whole Documents: {}".format(model_dmm.corpus_count))
    
    return (concatinated_model,revectorize)

In [17]:
def synthesize(sourceCodeScores,bugReportScores):
    sourceCodeScores=normalizer(sourceCodeScores)
    bugReportScores=normalizer(bugReportScores)
    for file in bugReportScores.keys():
        if file in sourceCodeScores.keys():
            sourceCodeScores[file]=sourceCodeScores[file]*0.8+bugReportScores[file]*0.2
    return sourceCodeScores


<center><h1>Main BugLocalization class</h1></center> 

In [18]:
class BugLocalizer:
    
    TFIDF_transformed=False
    dictionary_length=0
    all_projects_source_codes=pd.DataFrame([])
    all_projects_bugreports=pd.DataFrame([])
    dataFolder=""
    
    def __init__(self,group,project,result_path,dataPath):
        self.project=project
        self.resultPath=result_path
            
    def execute(self):
        print("\t ****** Localizing Bugs for project: {} ******".format(self.project))
        vectorize=self.loadEverything()
        revectorize=False
        
        if not BugLocalizer.TFIDF_transformed:
            (self.all_projects_bugreports,BugLocalizer.all_projects_source_codes,BugLocalizer.dictionary_length)=TFIDF_transform(all_bugs_df=BugLocalizer.all_projects_bugreports,
                                                                                                     source_codes_df=BugLocalizer.all_projects_source_codes)                                       
            BugLocalizer.TFIDF_transformed=True

        self.loadBugCurpus()
        self.loadSourceFiles()
        self.localize()
        self.evaluate()
        self.to_csv()
        self.write_result()
        
    def loadEverything():
        if BugLocalizer.all_projects_bugreports.empty:
            bugReportFile=os.path.join(BugLocalizer.dataFolder,'allBugReports.pickle')
            if not os.path.isfile(bugReportFile):
                print("The bug reports file (allBugReports.pickle) does not exist. please run the step0 first")
            else: 
                BugLocalizer.all_projects_bugreports=pd.read_pickle(bugReportFile)
                print("*** All Bug Reports are Loaded. ***")

        if BugLocalizer.all_projects_source_codes.empty:
            sourceCodeFile=os.path.join(BugLocalizer.dataFolder,'allSourceCodes.pickle')
            if not os.path.isfile(sourceCodeFile):
                print("The source codes file (allSourceCodes.pickle) does not exist. please run the step0 first")
            else:
                BugLocalizer.all_projects_source_codes=pd.read_pickle(sourceCodeFile)
                print("*** All Source Codes are Loaded. ***")
    
    def loadBugCurpus(self):
        self.all_bugs_df=BugLocalizer.all_projects_bugreports.loc[BugLocalizer.all_projects_bugreports['project']==self.project,:]
    
    def loadSourceFiles(self):
        self.source_codes_df=BugLocalizer.all_projects_source_codes.loc[BugLocalizer.all_projects_source_codes['project']==self.project,:]
    
    def vectorizeBugreports(self):
        BugLocalizer.all_projects_bugreports['doc2vec_vector']=np.array(BugLocalizer.all_projects_bugreports.text.apply(BugLocalizer.combined_Doc2vec.infer_vector))
        
    def vectorizeSourceCodes(self):
        BugLocalizer.all_projects_source_codes['doc2vec_vector']=np.array(BugLocalizer.all_projects_source_codes.code.apply(BugLocalizer.combined_Doc2vec.infer_vector))
     

    def localize(self):
        
        print("Localizing Now ...")
        self.source_codes_df=calulateLengthScore(self.source_codes_df)
        scores=[]
        direct_tfidf_index = similarities.SparseMatrixSimilarity(list(self.source_codes_df.tfidf_vector),num_features=BugLocalizer.dictionary_length)
        indirect_WE_index=np.array(list(self.all_bugs_df.doc2vec_vector))
        indirectScores=[]
        BRRanks=[]
        for i, br in tqdm_notebook(self.all_bugs_df.iterrows()):
            
            try:
                direct_tfidf_similarities=direct_tfidf_index[br.tfidf_vector]
                direct_tfidf_similarities=np_normalizer(direct_tfidf_similarities)
                sourceCodeScores={self.source_codes_df.iloc[j].filename.split('.')[-2]: (direct_tfidf_similarities[j])*self.source_codes_df.iloc[j].lengthScore 
                                                  for j in range(len(self.source_codes_df))
                                                  if len(self.source_codes_df.iloc[j].filename.split('.'))>1}                                  

                indirect_WE_similarities=cos_matrix_multiplication(indirect_WE_index, br.doc2vec_vector)
                bugReportScores=dict({})
                for j,(idx,other_br) in enumerate(self.all_bugs_df.iterrows()):
                    for fixFile in other_br.fix:
                        if idx != i:
                            if fixFile.split('.')[-2] in bugReportScores.keys():
                                if indirect_WE_similarities[j]>=bugReportScores[fixFile.split('.')[-2]]:
                                    bugReportScores[fixFile.split('.')[-2]]=indirect_WE_similarities[j]
                            else:
                                bugReportScores[fixFile.split('.')[-2]]=indirect_WE_similarities[j]
                indirectScores.append(bugReportScores)
                BRRanks.append({idx:indirect_WE_similarities[j] for j,(idx,other_br) in enumerate(self.all_bugs_df.iterrows())})
                ranking=synthesize(sourceCodeScores,bugReportScores)
                scores.append({file:score for file,score in sorted(ranking.items(),key=lambda tup: tup[1],reverse=True)})
            except Exception as e:
                logging.error(traceback.format_exc())
                scores.append({})
                
        self.all_bugs_df['Indirect_scores']=indirectScores
        self.all_bugs_df['BR_scores']=BRRanks                
        self.all_bugs_df['total_score']=scores

        
    def evaluate(self):
        self.result=evaluate(self.all_bugs_df,self.source_codes_df)
        print("Result/"+self.project+":\n\t",'*'*4," MAP: ",self.result[0],'*'*4,'\n\t','*'*4," MRR: ",self.result[1],'*'*4,"\n","-"*100)

    def to_csv(self):
        BugReports_path=os.path.join(self.resultPath,'BugReports') 
        SourceFiles_path=os.path.join(self.resultPath,'SourceFiles')
        if not os.path.exists(BugReports_path):
            os.makedirs(BugReports_path)
        if not os.path.exists(SourceFiles_path):
            os.makedirs(SourceFiles_path)
        result_Bug_file=os.path.join(BugReports_path,self.project+"_BugReports.csv")
        result_source_file=os.path.join(SourceFiles_path,self.project+"_SourceFiles.csv")
        self.all_bugs_df.to_csv(result_Bug_file)
        if len(self.all_bugs_df)<300:        
            self.source_codes_df.to_csv(result_source_file)
        
    def write_result(self):
        group_result=open(os.path.join(self.resultPath,"results_{}.csv".format(self.project)),'w')
        group_result.write("project , MAP , MRR , #ofBugReports\n")
        group_result.write(project+','+str(self.result[0])+','+str(self.result[1])+','+str(self.result[2])+"\n")
        group_result.close()
        

### MAIN

In [20]:
def folder_structure(run_name):
    result_path=os.path.join(os.getcwd(),"Result",run_name,strftime("%Y-%m-%d %H:%M:%S", gmtime()))
    if not os.path.exists(result_path):
        os.makedirs(result_path)
    return result_path
    
if __name__=="__main__":

    run_name='8-GlobalWEBugLocator'
    result_path=folder_structure(run_name)
    BugLocalizer.dataFolder=os.path.join(os.getcwd(),'Data')
    if BugLocalizer.all_projects_bugreports.empty or BugLocalizer.all_projects_source_codes.empty:
        BugLocalizer.loadEverything()
    
    all_projects=set(BugLocalizer.all_projects_bugreports.project)

    for project in all_projects:
        core=BugLocalizer(project=project,result_path=result_path)
        core.execute()

	 ****** Localizing Bugs for group: Spring , project: DATAREST ******
Localizing Now ...


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

eligible_br_count:  121
Result/Spring_DATAREST:
	 ****  MAP:  0.44786744588808763 **** 
	 ****  MRR:  0.6631087552849046 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SECOAUTH ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  66
Result/Spring_SECOAUTH:
	 ****  MAP:  0.4176536570043631 **** 
	 ****  MRR:  0.49445861053586343 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: ROO ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  558
Result/Spring_ROO:
	 ****  MAP:  0.34986977956236903 **** 
	 ****  MRR:  0.44496014246082394 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: DATAGRAPH ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  12
Result/Spring_DATAGRAPH:
	 ****  MAP:  0.16595826716596188 **** 
	 ****  MRR:  0.23623327187860063 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SHL ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  10
Result/Spring_SHL:
	 ****  MAP:  0.33752621171495306 **** 
	 ****  MRR:  0.4061633999235698 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SOCIALLI ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  4
Result/Spring_SOCIALLI:
	 ****  MAP:  0.3998484650270364 **** 
	 ****  MRR:  0.65625 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SGF ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  98
Result/Spring_SGF:
	 ****  MAP:  0.3821398990617936 **** 
	 ****  MRR:  0.6261971082388222 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SEC ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  362
Result/Spring_SEC:
	 ****  MAP:  0.47845770799489235 **** 
	 ****  MRR:  0.5996794446639655 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: LDAP ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  52
Result/Spring_LDAP:
	 ****  MAP:  0.3801187241561234 **** 
	 ****  MRR:  0.4778642203698764 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: DATAREDIS ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  49
Result/Spring_DATAREDIS:
	 ****  MAP:  0.5313276072268275 **** 
	 ****  MRR:  0.7612657184085757 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: AMQP ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  97
Result/Spring_AMQP:
	 ****  MAP:  0.3774103471634547 **** 
	 ****  MRR:  0.5389638733299548 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SOCIAL ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  13
Result/Spring_SOCIAL:
	 ****  MAP:  0.5998907006060389 **** 
	 ****  MRR:  0.6186384765332134 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SPR ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  123
Result/Spring_SPR:
	 ****  MAP:  0.2733090670006579 **** 
	 ****  MRR:  0.361699803254431 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SOCIALTW ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  8
Result/Spring_SOCIALTW:
	 ****  MAP:  0.80625 **** 
	 ****  MRR:  0.8375 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: DATACMNS ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  152
Result/Spring_DATACMNS:
	 ****  MAP:  0.5074509743640699 **** 
	 ****  MRR:  0.6725420568575324 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: BATCHADM ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  20
Result/Spring_BATCHADM:
	 ****  MAP:  0.46917767764120644 **** 
	 ****  MRR:  0.5845360195360195 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: MOBILE ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  11
Result/Spring_MOBILE:
	 ****  MAP:  0.6433744811854553 **** 
	 ****  MRR:  0.8363636363636363 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: BATCH ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  354
Result/Spring_BATCH:
	 ****  MAP:  0.4043184845027857 **** 
	 ****  MRR:  0.5903866465896869 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SHDP ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  45
Result/Spring_SHDP:
	 ****  MAP:  0.4300863363259259 **** 
	 ****  MRR:  0.5800120979341532 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: ANDROID ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  9
Result/Spring_ANDROID:
	 ****  MAP:  0.3220963116423541 **** 
	 ****  MRR:  0.6462962962962963 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SOCIALFB ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  15
Result/Spring_SOCIALFB:
	 ****  MAP:  0.505007651857174 **** 
	 ****  MRR:  0.5861375661375661 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: DATAMONGO ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  264
Result/Spring_DATAMONGO:
	 ****  MAP:  0.3519085164109085 **** 
	 ****  MRR:  0.5070100211174262 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SWF ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  105
Result/Spring_SWF:
	 ****  MAP:  0.4398811329192849 **** 
	 ****  MRR:  0.5474363308248886 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: SWS ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  159
Result/Spring_SWS:
	 ****  MAP:  0.42147183496132107 **** 
	 ****  MRR:  0.5346497747531151 **** 
 ----------------------------------------------------------------------------------------------------
	 ****** Localizing Bugs for group: Spring , project: DATAJPA ******
Localizing Now ...


HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))


eligible_br_count:  144
Result/Spring_DATAJPA:
	 ****  MAP:  0.47949330184682837 **** 
	 ****  MRR:  0.6862801022681223 **** 
 ----------------------------------------------------------------------------------------------------



<center><h1>Result</h1></center> 

In [15]:

method='8-GlobalWEBugLocator'
runNumber="run"
all_results_csv=[os.path.join(os.getcwd(),"Result",method,runNumber,folder) 
                 for folder in listdir(os.path.join(os.getcwd(),"Result",method,runNumber)) if '.csv' in folder]
results_df=pd.DataFrame([])
for result_csv in all_results_csv:
    res=pd.read_csv(result_csv,index_col=[0],header=0)
    results_df=results_df.append(res)

project_size_df=pd.read_csv('project_size.csv',index_col=[0],header=0)
results_df
results_df=pd.merge(results_df, project_size_df,
                                      left_index=True,
                                      right_index=True)
results_df=results_df.reset_index()
results_df=results_df.set_index(' #ofBugReports')

results_df.to_csv(os.path.join(os.getcwd(),"Result",method,'result.csv'))
