In [32]:
import os
import json
import re
# imports
import javalang
import os
from app.database import Database
from app.classes import IterationStatus, BatchAttemptStatus, TestStatus, AttemptStatus,Dataset
from typing import List
from app.constants import CONFIG
import pickle
import xml.etree.ElementTree as ET
from app.utils import etree_to_dict
from tqdm import tqdm
from app.constants import CoverageMode

BASE_DIR = r"C:\Users\tianfy\Desktop\毕设\数据集\chatunitest"
DATASET = "gson"
TEST_FOLDER = fr"{BASE_DIR}\chatunitest-tests-{DATASET}"
FILTERED_TEST_FOLDER = fr"{BASE_DIR}\chatunitest-tests-{DATASET}-filtered"
INFO_ZIP = fr"{BASE_DIR}\chatunitest-info_{DATASET}.zip"
CLOVER_XML_PATH = fr"{BASE_DIR}\{DATASET}.xml"


In [33]:
# template : ClassName_MethodName_MethodId_AttemptId_Test.java
# example:  NumberUtils_min_23_0_Test.java
# 递归遍历TEST_FOLDER，找到所有符合template的java文件
def find_java_files(folder):
    java_files = []
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.endswith(".java"):
                java_files.append(os.path.join(root, file))
    return java_files


java_files = find_java_files(TEST_FOLDER)
print(len(java_files))
pattern = ".*?_.*?_\d+_\d+_Test\.java"
java_files = [file for file in java_files if re.match(pattern, file)]
print(len(java_files))

217
185


In [34]:
import zipfile
import json
import re


def get_attempt_flag(records: list):
    hasError =  records[-1]["hasError"]
    if hasError:
        return records[-1]["errorMsg"]["errorType"]
    else:
        return "PASS"

class_map = {}

with zipfile.ZipFile(INFO_ZIP, 'r') as z:
    for item in z.namelist():
        if re.match(r'.*attempt\d+/$', item):
            pattern = r'/class(\d+)/method(\d+)/attempt(\d+)/'
            match = re.search(pattern, item)

            if match:
                class_id = match.group(1)
                method_id = match.group(2)
                attempt_id = match.group(3)
            else:
                assert False

            with z.open(f'{item}records.json') as f:
                records = json.loads(f.read())
                attempt_flag = get_attempt_flag(records)
            with z.open(f'{item.replace(f"attempt{attempt_id}/", "")}attemptMapping.json') as f:
                mapping = json.loads(f.read())
                className = mapping['attempt0']['className']
                testClassName = mapping['attempt0']['testClassName']
                className = f"{className}_{class_id}"
                if className not in class_map:
                    class_map[className] = {}
                if testClassName not in class_map[className]:
                    class_map[className][testClassName] = []
                class_map[className][testClassName].append(attempt_flag)

In [35]:
from collections import Counter


def shorten_key(key):
    pattern = r"(.*?)_(.*?)_\d+_\d+_Test"
    match = re.search(pattern, key)
    if match:
        return match.group(1) + "#" + match.group(2)
    else:
        assert False
java_info_map = {k: v for class_name, class_info in class_map.items() for k, v in class_info.items() if k.split("_")[0] != k.split("_")[1]}
shorten_java_info_map = Counter([shorten_key(k) for k in java_info_map.keys()])
print(f"chatunitest total: {len(java_info_map)}")
java_valid_map = {k: v for k, v in java_info_map.items() if v[-1] == "PASS"}
print(
    Counter([v[-1] for k, v in java_info_map.items()]),
)
shorten_java_valid_map = Counter([shorten_key(k) for k in java_valid_map.keys()])
print(f"chatunitest valid unique: {len(shorten_java_valid_map)}")

chatunitest total: 378
Counter({'PASS': 188, 'COMPILE_ERROR': 105, 'RUNTIME_ERROR': 85})
chatunitest valid unique: 167


In [66]:
import shutil

# java_files[0].replace(TEST_FOLDER, FILTERED_TEST_FOLDER)
keys = set()

for f in java_files:
    key = f.split("\\")[-1].split(".")[0]
    if key in java_valid_map:
        keys.add(key)
        dst = f.replace(TEST_FOLDER, FILTERED_TEST_FOLDER)
        os.makedirs(os.path.dirname(dst), exist_ok=True)
        shutil.copy2(f, dst)
print("expected: ", len(java_valid_map))
print("actual: ", len(keys))
set(java_valid_map.keys()) - keys

expected:  114
actual:  114


set()

In [36]:
import pickle
from app.classes import Dataset
from app.constants import CONFIG

# read dataset
with open(f"{CONFIG.dataset_folder}/{DATASET}.pkl", "rb") as f:
    raw_dataset = pickle.load(f)
    dataset = Dataset(**raw_dataset)

dataset_info_list = []
for class_info in dataset:
    for method_info in class_info:
        key = class_info.class_name + "#" + method_info.name
        dataset_info_list.append(key)
print(f"dataset total: {len(dataset_info_list)}")
dataset_info_map = Counter(dataset_info_list)
print(f"dataset unique: {len(dataset_info_map)}")

dataset total: 378
dataset unique: 325


In [37]:
# compare shorten_java_info_map and s
for k, v in dataset_info_map.items():
    if k not in shorten_java_info_map or shorten_java_info_map[k] != v:
        print(k, v, shorten_java_info_map[k])


In [38]:
from app.classes import IterationStatus

total_branch_tupe = (0, 0)
total_line_tupe = (0, 0)
count = 0

tree = ET.parse(CLOVER_XML_PATH)
# tree = ET.parse(fr"D:\IDEA_Projects\TestJavaCode\target\site\clover\clover.xml")
root = tree.getroot()
root_dict = etree_to_dict(root)['coverage']
method_coverage = {} # {method_signature: (branch_rate, line_rate)}
for class_info in tqdm(dataset):
    for method_info in class_info:
        if not f"{class_info.class_name}#{method_info.name}" in shorten_java_valid_map:
            continue

        count += 1
        source_packages: list = root_dict['project'][0]['package']
        # 找到focal class & focal method
        source_package = None
        for package in source_packages:
            if class_info.package_reference == package['@name']:
                source_package = package
                break
        assert source_package is not None

        source_files = source_package['file']
        source_file = None
        for file in source_files:
            if f"{class_info.class_name}.java" == file['@name']:
                source_file = file
                break
        assert source_file is not None

        source_report = source_file

        report_lines = source_report['line']
        branches = [line for line in report_lines if line['@type'] == 'cond']
        lines = [line for line in report_lines if line['@type'] == 'stmt']

        # 当测试某一个方法时，筛选出这个方法的line/branch
        branches = [branch for branch in branches if
                    method_info.start <= int(branch["@num"]) <= method_info.end]
        lines = [line for line in lines if
                 method_info.start <= int(line["@num"]) <= method_info.end]

        uncovered_branch = []
        false_uncovered_branch = []
        true_uncovered_branch = []
        unreachable_branch = []

        for branch in branches:
            if branch['@truecount'] == '0' or branch['@falsecount'] == '0':
                uncovered_branch.append(branch)
            if branch['@truecount'] != '0' and branch['@falsecount'] == '0':
                false_uncovered_branch.append(branch)
            if branch['@truecount'] == '0' and branch['@falsecount'] != '0':
                true_uncovered_branch.append(branch)
            if branch['@truecount'] == '0' and branch['@falsecount'] == '0':
                unreachable_branch.append(branch)



        branch_num_uncovered = len(false_uncovered_branch) + len(true_uncovered_branch) + 2 * len(unreachable_branch)
        branch_num_total = 1 if len(branches) == 0 else len(branches) * 2
        branch_num_covered = branch_num_total - branch_num_uncovered



        uncovered_line = []
        for line in lines:
            if line['@count'] == '0':
                uncovered_line.append(line)

        line_num_uncovered = len(uncovered_line)
        line_num_total = 1 if len(lines) == 0 else len(lines)
        line_num_covered = line_num_total - line_num_uncovered


        # if any([method_info.name.startswith(x) for x in ["get", "set", "is"]]) or method_info.name[0].isupper():
        #     branch_num_covered = branch_num_total
        #     line_num_covered = line_num_total

        if line_num_covered == 0:
            count -= 1
            continue
            branch_num_covered = 0

        branch_rate_tuple = (branch_num_covered, branch_num_total, branch_num_covered / branch_num_total)
        line_rate_tuple = (line_num_covered, line_num_total, line_num_covered / line_num_total)
        # print(f"Class: {class_info.class_name}, Method: {method_info.name}")
        # print(f"Branch Coverage: {branch_rate_tuple}")
        # print(f"Line Coverage: {line_rate_tuple}")
        method_coverage[f"{class_info.class_name}#{method_info.signature}"] = (branch_rate_tuple, line_rate_tuple)
        total_branch_tupe = (total_branch_tupe[0] + branch_rate_tuple[0], total_branch_tupe[1] + branch_rate_tuple[1])
        total_line_tupe = (total_line_tupe[0] + line_rate_tuple[0], total_line_tupe[1] + line_rate_tuple[1])
print(count)
print(f"Total Branch Coverage:  {total_branch_tupe} {total_branch_tupe[0] / total_branch_tupe[1]}")
print(f"Total Line Coverage: {total_line_tupe} {total_line_tupe[0] / total_line_tupe[1]}")

52it [00:00, 4000.80it/s]

192
Total Branch Coverage:  (341, 461) 0.7396963123644251
Total Line Coverage: (639, 801) 0.797752808988764





In [74]:
final_total_branch_tuple = (0,0)
final_total_line_tuple = (0,0)


In [75]:
final_total_branch_tuple = (final_total_branch_tuple[0] + total_branch_tupe[0], final_total_branch_tuple[1] + total_branch_tupe[1])
final_total_line_tuple = (final_total_line_tuple[0] + total_line_tupe[0], final_total_line_tuple[1] + total_line_tupe[1])
print(f"Total Branch Coverage:  {final_total_branch_tuple} {final_total_branch_tuple[0] / final_total_branch_tuple[1]}")
print(f"Total Line Coverage: {final_total_line_tuple} {final_total_line_tuple[0] / final_total_line_tuple[1]}")

Total Branch Coverage:  (153, 177) 0.864406779661017
Total Line Coverage: (272, 295) 0.9220338983050848
