# Put the Groundtruth in the First place of the ranking

In [3]:
import json
import os


def re_rank_methods(data):
    for i, method in enumerate(data['covered_methods']):
        method['method_id'] = i
    return data


# Function to map method IDs to their signatures from the JSON test files
def map_method_ids_to_signatures(processed_dir, bug_id, test_id):
    method_signatures_map = {}
    test_file_path = os.path.join(processed_dir, f"{bug_id}", f"test_{test_id}.json")
    if os.path.exists(test_file_path):
        with open(test_file_path, 'r') as json_file:
            data = json.load(json_file)
        for method in data.get('covered_methods', []):
            method_signatures_map[method['method_id']] = method['method_signature']
    return method_signatures_map


# Function to load the ground truth methods from the txt file for a specific bug
def load_ground_truth_methods(ground_truth_dir, bug_id):
    ground_truth_file = os.path.join(ground_truth_dir, f"{bug_id}.txt")
    ground_truth_methods = set()  # Store ground truth methods in a set for quick lookup
    if os.path.exists(ground_truth_file):
        with open(ground_truth_file, 'r') as file:
            ground_truth_methods = set(line.strip() for line in file.readlines())
    return ground_truth_methods


# Function to create a perfect ranking of covered methods, with ground truth methods at the top
def generate_perfect_ranking(ranked_data_dir, ground_truth_dir, perfect_ranking_dir, project_name):
    
    if not os.path.exists(perfect_ranking_dir):
        os.makedirs(perfect_ranking_dir)

    for bug_id in os.listdir(ranked_data_dir):  # Loop over bugs in the project
        bug_ranking_dir = os.path.join(perfect_ranking_dir, bug_id)
        if not os.path.exists(bug_ranking_dir):
            os.makedirs(bug_ranking_dir)

        ground_truth_methods = load_ground_truth_methods(ground_truth_dir, bug_id)

        for test_id in os.listdir(os.path.join(ranked_data_dir, bug_id)):
            ranked_file_path = os.path.join(ranked_data_dir, bug_id, test_id)
            perfect_ranking_file_path = os.path.join(bug_ranking_dir, test_id)
            method_signatures_map = map_method_ids_to_signatures(ranked_data_dir, bug_id, test_id)

            if os.path.exists(ranked_file_path):
                with open(ranked_file_path, 'r') as json_file:
                    data = json.load(json_file)
                covered_methods = data.get('covered_methods', [])
                
                # Separate ground truth methods and non-ground truth methods
                ground_truth_covered = []
                non_ground_truth_covered = []

                for method in covered_methods:
                    if method['method_signature'] in ground_truth_methods:
                        ground_truth_covered.append(method)
                    else:
                        non_ground_truth_covered.append(method)

                # Update method ids: ground truth methods get highest ids
                new_id = len(covered_methods)
                for method in ground_truth_covered:
                    method['method_id'] = new_id
                    new_id -= 1

                # Combine ground truth methods at the top, followed by other methods
                perfect_covered_methods = ground_truth_covered + non_ground_truth_covered
                data['covered_methods'] = perfect_covered_methods

                data = re_rank_methods(data)

                # Save the perfect ranking to the output file
                with open(perfect_ranking_file_path, 'w') as json_file:
                    json.dump(data, json_file, indent=4)


# List of projects and techniques
# projects = ["Cli", "Math", "Csv", "Codec", "Gson", "JacksonCore", "JacksonXml", "Mockito", "Compress", "Jsoup"]
projects = ["Time"]
techniques = ["callgraph"]

for project_name in projects:
    for technique in techniques:
        ranked_data_dir = f'../data/RankedData/{project_name}/{technique}'
        ground_truth_dir = f'../data/BuggyMethods/{project_name}'
        perfect_ranking_dir = f'../data/RankedData/{project_name}/perfect_callgraph'
        generate_perfect_ranking(ranked_data_dir, ground_truth_dir, perfect_ranking_dir, project_name)


# Randomize the ranking

In [10]:
import json
import os
import random


def re_rank_methods(data):
    """Reassign method IDs in sequence after shuffling."""
    for i, method in enumerate(data['covered_methods']):
        method['method_id'] = i
    return data


# Function to map method IDs to their signatures from the JSON test files
def map_method_ids_to_signatures(processed_dir, bug_id, test_id):
    method_signatures_map = {}
    test_file_path = os.path.join(processed_dir, f"{bug_id}", f"test_{test_id}.json")
    if os.path.exists(test_file_path):
        with open(test_file_path, 'r') as json_file:
            data = json.load(json_file)
        for method in data.get('covered_methods', []):
            method_signatures_map[method['method_id']] = method['method_signature']
    return method_signatures_map


# Function to create a random ranking of covered methods
def generate_random_ranking(ranked_data_dir, random_ranking_dir, project_name):
    
    if not os.path.exists(random_ranking_dir):
        os.makedirs(random_ranking_dir)

    for bug_id in os.listdir(ranked_data_dir):  # Loop over bugs in the project
        bug_ranking_dir = os.path.join(random_ranking_dir, bug_id)
        if not os.path.exists(bug_ranking_dir):
            os.makedirs(bug_ranking_dir)

        for test_id in os.listdir(os.path.join(ranked_data_dir, bug_id)):
            ranked_file_path = os.path.join(ranked_data_dir, bug_id, test_id)
            random_ranking_file_path = os.path.join(bug_ranking_dir, test_id)

            if os.path.exists(ranked_file_path):
                with open(ranked_file_path, 'r') as json_file:
                    data = json.load(json_file)
                covered_methods = data.get('covered_methods', [])
                
                # Randomize the order of covered methods
                random.shuffle(covered_methods)
                data['covered_methods'] = covered_methods

                # Reassign method IDs in sequence after randomizing
                data = re_rank_methods(data)

                # Save the random ranking to the output file
                with open(random_ranking_file_path, 'w') as json_file:
                    json.dump(data, json_file, indent=4)


# List of projects and techniques
# projects = ["Cli", "Math", "Csv", "Codec", "Gson", "JacksonCore", "JacksonXml", "Mockito", "Compress", "Jsoup", "Lang"]
projects = ["Time"]
techniques = ["execution"]

for project_name in projects:
    for technique in techniques:
        ranked_data_dir = f'../data/RankedData/{project_name}/{technique}'
        random_ranking_dir = f'../data/RankedData/{project_name}/random'  # Save random rankings here
        generate_random_ranking(ranked_data_dir, random_ranking_dir, project_name)
