### 1. Basic Information of SynPTCEvo4j
- SynPTCEvo4J: `UnitTestUpdater/dataset/synPTCEvo4j/test.json`
- In total: **136**

### 2. Human Evaluation

**2.1 show the difference between precision and ground truth**

In [None]:
import json, os
from utils.helper import read_examples, get_diff
from utils.gitter import setup_repo, UpdateRepo
from utils.configs import REPO_BASE
from utils.formatter import formatted_java_code
from utils.parser import get_code_without_comments, has_parse_error

# dataset
input_datafile = "dataset/synPTCEvo4j/test.json"

# Baselines
output_datafile_ceprot = "outputs/CEPROT/test_ceprot.json"
output_datafile_noctx = "outputs/NaiveLLM/test_woctx.json"  # n1,n2,n3

# Our Approach: SynBCIATR
output_datafile_allctx_wot = "outputs/SynBCIATR/test_all_ctx_wot.json"  # n1,n2,n3


def read_data(datafile):
    with open(datafile, "r") as f:
        data = json.load(f)
    return data


all_outputs = [
    read_data(output_datafile_ceprot),
    read_data(output_datafile_noctx),
    read_data(output_datafile_allctx_wot),
]

all_infos = [
    "CEPROT",
    "NaiveLLM",
    "SynBCIATR",
]


def substitute_code(repo: UpdateRepo, exp, pred, type_idx):
    testfile_ori = repo.get_file_tgt(exp.test_db["rel_path"])
    if testfile_ori.find(exp.test_db["method_tgt"]) == -1:
        print("?? Method not found in test file ??")
        return None
    testfile_new = testfile_ori.replace(exp.test_db["method_tgt"], pred)
    testpath = os.path.join(repo.working_tree_dir, exp.test_db["rel_path"])
    with open(testpath, "w") as f:
        f.write(testfile_new)
    print(
        f"!! Test: {testpath} \nhas been substitute with prediction with type: [{all_infos[type_idx]}], try running build !!"
    )


# show precision, ground truth and their diff at the same time
def show_info_full(exp, idx):
    src_code = get_code_without_comments(exp.test_db["method_src"])
    src_fmt = formatted_java_code(src_code)
    print(f"--> Original Test:\n{src_fmt}")
    print()
    for outputs, infos in zip(all_outputs, all_infos):
        print("==" * 5 + " " + infos + " " + "==" * 5)
        pred = outputs[idx]["prediction"]
        if has_parse_error(pred):
            print("?? Parse Error ??")
            continue
        pred_code = get_code_without_comments(pred)
        pred_fmt = formatted_java_code(pred_code)
        ref_code = get_code_without_comments(exp.test_db["method_tgt"])
        ref_fmt = formatted_java_code(ref_code)
        res_diff = get_diff(pred_fmt, ref_fmt)
        if len(res_diff) == 0:
            print("!! No Difference, accurately repaired !!")
        else:
            print(f"--> Prediction:\n{pred_fmt}")
            print(f"--> Ground Truth:\n{ref_fmt}")
            print(f"--> Diff:\n{res_diff}")


# only show the diff between precision and ground truth
def show_info(exp, idx):
    for outputs, infos in zip(all_outputs, all_infos):
        print("==" * 5 + " " + infos + " " + "==" * 5)
        pred = outputs[idx]["prediction"]
        if has_parse_error(pred):
            print("?? Parse Error ??")
            continue
        pred_code = get_code_without_comments(pred)
        pred_fmt = formatted_java_code(pred_code)
        ref_code = get_code_without_comments(exp.test_db["method_tgt"])
        ref_fmt = formatted_java_code(ref_code)
        res_diff = get_diff(pred_fmt, ref_fmt)
        if len(res_diff) == 0:
            print("!! No Difference, accurately repaired !!")
        else:
            print(f"--> Diff:\n{res_diff}")


# evaluate idx
idx = 1

print("##" * 5 + " [" + str(idx) + "] " + "##" * 5)
examples = read_examples(input_datafile)
exp = examples[idx]
print(f"Repo Name : {exp.repo_name}")
print(f"Commit ID : {exp.commit_id}")

# checkout the repository to this given commit
repo_root = os.path.join(REPO_BASE, exp.repo_name)
print(f"Repo Root Path : {repo_root}")
repo: UpdateRepo = setup_repo(exp.repo_name, exp.commit_id, repo_base=REPO_BASE)

# Show result（-: precision；+: ground truth）
show_info_full(exp, idx)

**2.2 Substitue the ground truth with precision and manually check compilability**

In [None]:
# substitute with prediction
type_idx = 1
pred = all_outputs[type_idx][idx]["prediction"]
substitute_code(repo, exp, pred, type_idx)