In [1]:
import re
import pandas as pd
import os
import csv

In [2]:
df = pd.read_excel("./bugs.xls", names=['a', 'b', 'c', 'd', 'e', 'f', 'g'], header=None)
df.head(5)

Unnamed: 0,a,b,c,d,e,f,g
0,apache,commit,ce0b029d3dc46512ae7fec271d4ca536fe22257b,server/mpm_common.c,AP_MPM_USES_POD,1.0,1.0
1,apache,commit,1b48bb3c1af572fe256473db502663cc2442cf29,modules/experimental/util_ldap_cache.c,APR_HAS_SHARED_MEMORY,1.0,1.0
2,apache,commit,728709bcb84ca8bf2202078f9ee470f2f8c48533,os/unix/unixd.c,APR_HAS_SYSVSEM_SERIALIZE,1.0,1.0
3,apache,release,apache-2.0.65,modules/ssl/ssl_util.c,APR_HAS_THREADS,1.0,1.0
4,apache,commit,f3d438afd9a500bd96d4090121657bf5d4219b34,server/listen.c,APR_HAVE_IPV6,1.0,1.0


In [3]:
def gen_csv_file(bugs_preprocess_dir, filename, paras_tuple):
    with open(bugs_preprocess_dir + '/' + filename + '.csv', 'a+') as to_csv_file:
        writer = csv.writer(to_csv_file)
        writer.writerow([v for (_, v) in paras_tuple])

In [4]:
def gen_readme_file(bugs_preprocess_dir, filename, paras):
    with open(bugs_preprocess_dir + '/' + filename + '_readme.md', 'w+') as readme_file:
        readme_file.writelines("This is a description of all the parameters of the related code,\n")
        readme_file.writelines("the Arabic numbers represent the index of one parameter within a test case,\n")
        readme_file.writelines("followed by the characters representing the names of the parameters,\n")
        readme_file.writelines("such as `i: NAME` representing the name of i-th parameter is **NAME** \n")
        readme_file.writelines("\n\n")
        readme_file.writelines("for the `.csv` and `.safe` files, we define the value of each parameter to be 1 for the 'n' and 2 for the 'y'\n")
        readme_file.writelines("and for the `.fault` files, each block of file separated by '#' represent one complete fault parameter-value pairs\n")
        readme_file.writelines("such as `k: v` representing there is a part of fault when k-th parameter take 'v'\n")
        readme_file.writelines("and if you assign all the parameters within a block to the corresponding values, you will get an fault\n")
        readme_file.writelines("\n\n")
        
        for i, para in enumerate(paras):
            readme_file.writelines("#### {}: {} \n".format(i, para))

In [5]:
def gen_fault_safe_file(fault_lists, paras, bugs_preprocess_dir, filename):
    faults = parse_faults_from_list(fault_lists, paras)
    
    with open(bugs_preprocess_dir + '/' + filename + '.fault', 'w') as to_fault_file:
        for fault in faults:
            to_fault_file.writelines(', '.join(["{}:{}".format(fault_index,fault_value) for (fault_index, fault_value) in fault]))
            to_fault_file.writelines("\n##########\n")

    safes_tuple = [[1, 2] for para in paras]
    for fault in faults:
        for (index, value) in fault:
            safes_tuple[index][value-1] = -1
    
    safe_value_list = []
    for i, (k, v) in enumerate(safes_tuple):
        if k == -1 and v == -1:
            try:
                err_str = "{}/{} hasn't safe value! because {} can't decide".format(bugs_preprocess_dir, filename, paras[i])
                raise ValueError(err_str)
            except ValueError as e:
                print(str(e))
                return
        else:
            if not k == -1:
                safe_value_list.append(k)
            else:
                safe_value_list.append(v)
    
    
    with open(bugs_preprocess_dir + '/' + filename + '.safe', 'w') as to_safe_file:
        writer = csv.writer(to_safe_file)
        writer.writerow(safe_value_list)

In [6]:
def parse_faults_from_list(fault_lists, paras):
#     print(fault_lists, paras)
    faults = []
    for fault_list in fault_lists:
        fault = []
        for factor_str in fault_list:
            if factor_str.startswith("!"):
                factor = factor_str.replace("!", "")
                index = paras.index(factor)
                fault.append((index, 1))
            else:
                factor = factor_str
                index = paras.index(factor)
                fault.append((index, 2))
        faults.append(fault)
    return faults

In [7]:
def gen_related_files(bugs_dir, bugs_preprocess_dir, filename, fault_str):
#     print(bugs_dir, filename, fault_str)
    fault_lists = parse_faults_from_excel_str(fault_str)
#     print(fault_lists)
    if not os.path.exists(bugs_preprocess_dir):
        os.makedirs(bugs_preprocess_dir)
        
    # 遍历文件夹下每一个文件，将 .config 文件转为合适的 csv 文件
    paras_len_list = []
    for config_filename_str in os.listdir(bugs_dir):
        if not re.search('^' + filename + '.config\d', config_filename_str):
            continue
            
        paras_tuple_list = []
        paras_list = []
        with open(bugs_dir + '/' + config_filename_str, 'r') as config_file:
            for line in config_file.readlines():
                parakv = re.findall("([\w.]+)=([ynm])",line)
                if parakv:
                    [(k, v)] = parakv
                    paras_tuple_list.append((k, 2 if v=='y' or v == 'm' else 1))
                    paras_list.append(k)
                    
        paras_len_list.append(len(paras_tuple_list))
        
        if not os.path.exists(bugs_preprocess_dir + '/' + filename + '_readme.md'):
            gen_readme_file(bugs_preprocess_dir, filename, paras_list)
        if not os.path.exists(bugs_preprocess_dir + '/' + filename + ".fault"):    
            gen_fault_safe_file(fault_lists, paras_list, bugs_preprocess_dir, filename)

        gen_csv_file(bugs_preprocess_dir, filename, paras_tuple_list)
    
    assert len(set(paras_len_list)) == 1

In [8]:
def parse_faults_from_excel_str(faults_str):
    faults = re.split("[|]{2}", faults_str)
    fault_lists = []
    for fault in faults:
        pattern = re.compile("[(]?([!\w]+)[)]?")
        fault_list = pattern.findall(fault)
        fault_lists.append(fault_list)
    return fault_lists

In [9]:
paths = df.loc[:, 'a'] +'/'+ df.loc[:, 'd']
fault_strs = df.loc[:, 'e']
for (path, fault_str) in zip(paths, fault_strs):
    (dir_path, filename) = path.rsplit('/', 1)
    bugs_dir = 'bugs/' + dir_path
    bugs_preprocess_dir = 'bugs_preprocess/' + dir_path
    gen_related_files(bugs_dir, bugs_preprocess_dir, filename, fault_str)

bugs_preprocess/cvs/src/buffer.c hasn't safe value! because SHUTDOWN_SERVER can't decide


In [None]:
8:1, 5:1, 7:1, 4:1, 6:1, 3:2
##########

In [49]:
ls = [1,1,1,1,2,2,2,2,2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
     ]
ls[8] = 1
ls[5] = 1
ls[7] = 1
ls[4] = 1
ls[6] = 1
ls[3] = 2
print(ls)

[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [50]:
with open("test.txt", "w") as t:
    t.writelines(",".join([str(i) for i in ls]))