In [2]:
import os
import collections
import re
import pprint

# Importing data

In [3]:
FILES = [file for file in os.listdir("logs_uniform_run2") if not "QuixBugs" in file]

In [4]:
len(FILES)

26

In [5]:
raw_records_list = list()

for file in FILES:
    if file != 'WRAP.log':
        f = open(f"logs_uniform_run2/{file}", "r")
        for raw_line in f:
            line = raw_line.strip().split(";")
            line.append(file[:-4])
            raw_records_list.append(line)

In [6]:
i = 0
for line in raw_records_list:
    
    # extract run information for current patch
    if line[12] != "None":
        line[12] = line[12].split(", ")
        line[12][0] = line[12][0][1:]
        line[12][-1] = line[12][-1][:-2]
        line[12] = set(line[12])
    
    # extract run information for previous patch    
    if line[13] != "None":
        line[13] = line[13].split(", ")
        line[13][0] = line[13][0][1:]
        line[13][-1] = line[13][-1][:-2]
        line[13] = set(line[13])
        
    # decode target modiciation point for comparison operators
    if "Comp" in line[9]:
        line[10] = comp_op_per_program_dict[line[14]][line[10]]

In [7]:
len(raw_records_list)

225571

In [8]:
records_list = [
    line for line in raw_records_list 
    if 
    not "EditDeletion" in line[9]
    and
    not (line[9] == 'StmtReplacement' and line[10] == line[11])
    and
    not (line[9] == 'ComparisonOperator' and line[10] == line[11])
]

In [9]:
len(records_list)

167931

# Aggregating data: operator x target node

In [10]:
def make_percent(num):
    num = num * 100
    num = round(num, 2)
    
    return str(num) + "%"

0 iter, 1 run status, 2 previous run status,

3 return code, 4 previous return code,

5 fitness, 6 previous fitness,

7 patch length, 8 previous patch length,

9 operator, 10 target, 11 ingredient,

12 run info, 13 previous run info

14 program name

op_target_dict[operator] = {node_type: [not compile, compile, better fitness, worse fitness, same fitness, actually same fitness]}

In [11]:
run_status_dict = collections.defaultdict(int)

for line in records_list:
    run_status_dict[line[1]] += 1

In [12]:
run_status_dict

defaultdict(int, {'PARSE_ERROR': 119496, 'SUCCESS': 48435})

In [14]:
sum(run_status_dict.values())

167931

In [15]:
op_target_dict = collections.defaultdict(dict)

operators_dict = collections.defaultdict(int)

for line in records_list:
    run_status = line[1]
    operator = line[9]
    target_node = line[10]
    
    if operator != "ComparisonOperator":
        target_node = line[10].split("/")[-1][:-3]
    
    if not operator in operators_dict.keys():
        operators_dict[operator] = [0, 0, 0, 0, 0, 0]
    
    if not target_node in op_target_dict[operator].keys():
        op_target_dict[operator][target_node] = [0, 0, 0, 0, 0, 0]
        
        
    if run_status == 'PARSE_ERROR':
        op_target_dict[operator][target_node][0] += 1
        operators_dict[operator][0] += 1
        # op_target_dict[operator][target_node][3] += 1
        
    elif run_status == 'SUCCESS':
        op_target_dict[operator][target_node][1] += 1
        operators_dict[operator][1] += 1
        
        fitness = line[5]
        previous_fitness = line[6]
        run_info = line[12]
        previous_run_info = line[13]
        
        if fitness < previous_fitness: # better fitness
            op_target_dict[operator][target_node][2] += 1
            operators_dict[operator][2] += 1
        elif fitness > previous_fitness: # worse fitness
            op_target_dict[operator][target_node][3] += 1
            operators_dict[operator][3] += 1
        else: # same fitness
            op_target_dict[operator][target_node][4] += 1
            operators_dict[operator][4] += 1
            if run_info == previous_run_info: # same fitness to run info level
                op_target_dict[operator][target_node][5] += 1
                operators_dict[operator][5] += 1

In [144]:
# for key, value in operators_dict.items():
#     iter_num = value[0] + value[1]
#     print(key, iter_num, make_percent(iter_num / len(records_list)), value)

In [16]:
for key, value in operators_dict.items():
    iter_num = value[0] + value[1]
    value_percent = [make_percent(el / iter_num) for el in value]
    print(f"{key},{iter_num},{value_percent[0]},{value_percent[1]},{value_percent[2]},{value_percent[3]},{value_percent[4]},{value_percent[5]}")

StmtInsertion,81110,71.31%,28.69%,0.66%,5.02%,23.02%,10.51%
StmtReplacement,74303,77.16%,22.84%,1.49%,8.43%,12.93%,6.34%
StmtDeletion,7189,52.19%,47.81%,3.94%,20.18%,23.69%,9.53%
ComparisonOperator,5329,10.85%,89.15%,13.57%,36.16%,39.43%,17.88%


In [123]:
# for operator in op_target_dict.keys():
#     for node, stats in op_target_dict[operator].items():
#         print(f"{operator} - {node} - {stats}")
#     print("\n")

In [129]:
for operator in op_target_dict.keys():
    for node, stats in op_target_dict[operator].items():
        iter_num = stats[0] + stats[1]
        stats_percent = [make_percent(el / iter_num) for el in stats]
        print(f"{operator},{node},{iter_num}," ,end="")
        print(f"{stats_percent[0]},{stats_percent[1]},{stats_percent[2]},{stats_percent[3]},{stats_percent[4]},{stats_percent[5]}")

StmtInsertion,expr_stmt,21809,59.24%,40.76%,0.75%,7.64%,32.37%,14.14%
StmtInsertion,decl_stmt,19413,86.98%,13.02%,0.04%,2.2%,10.78%,5.16%
StmtInsertion,if,13325,70.46%,29.54%,1.05%,5.57%,22.92%,10.76%
StmtInsertion,for,8695,75.41%,24.59%,0.4%,4.97%,19.22%,9.59%
StmtInsertion,return,16072,68.2%,31.8%,1.14%,3.19%,27.47%,12.49%
StmtInsertion,while,1633,63.14%,36.86%,0.24%,17.88%,18.74%,7.96%
StmtInsertion,continue,163,57.06%,42.94%,0.0%,0.0%,42.94%,20.86%
StmtReplacement,decl_stmt,17841,96.86%,3.14%,0.0%,0.01%,3.14%,2.56%
StmtReplacement,for,8044,78.17%,21.83%,1.48%,8.47%,11.88%,6.2%
StmtReplacement,expr_stmt,20020,62.1%,37.9%,2.11%,14.05%,21.74%,9.33%
StmtReplacement,if,12293,79.98%,20.02%,2.17%,8.18%,9.66%,4.99%
StmtReplacement,return,14526,71.78%,28.22%,2.03%,9.99%,16.2%,8.21%
StmtReplacement,while,1436,69.15%,30.85%,0.28%,21.66%,8.91%,3.34%
StmtReplacement,continue,143,53.85%,46.15%,0.0%,0.0%,46.15%,22.38%
StmtDeletion,expr_stmt,1645,11.0%,89.0%,5.17%,38.97%,44.86%,16.53%
StmtDeletion

# Aggregating data: operator x target node x ingredient node

In [130]:
op_target_ingredient_dict = collections.defaultdict(dict)


for line in records_list:
    run_status = line[1]
    operator = line[9]
    target_node = line[10]
    ingredient_node = line[11]
    
    if operator != "ComparisonOperator":
        target_node = line[10].split("/")[-1][:-3]
        ingredient_node = line[11].split("/")[-1][:-3]
    
    if not target_node in op_target_ingredient_dict[operator].keys():
        op_target_ingredient_dict[operator][target_node] = dict()
   
    if not ingredient_node in op_target_ingredient_dict[operator][target_node].keys():
        op_target_ingredient_dict[operator][target_node][ingredient_node] = [0, 0, 0, 0, 0, 0]
        
    if run_status == 'PARSE_ERROR':
        op_target_ingredient_dict[operator][target_node][ingredient_node][0] += 1
        
    elif run_status == 'SUCCESS':
        op_target_ingredient_dict[operator][target_node][ingredient_node][1] += 1
        
        fitness = line[5]
        previous_fitness = line[6]
        run_info = line[12]
        previous_run_info = line[13]
        
        if fitness < previous_fitness: # better fitness
            op_target_ingredient_dict[operator][target_node][ingredient_node][2] += 1
        elif fitness > previous_fitness: # worse fitness
            op_target_ingredient_dict[operator][target_node][ingredient_node][3] += 1
        else: # same fitness
            op_target_ingredient_dict[operator][target_node][ingredient_node][4] += 1
            if run_info == previous_run_info: # same fitness to run info level
                op_target_ingredient_dict[operator][target_node][ingredient_node][5] += 1

In [124]:
# for operator in op_target_dict.keys():
#     if operator != "StmtDeletion" and operator != "ComparisonOperator":
#         for target_node in op_target_ingredient_dict[operator].keys():
#             for ingredient_node, stats in op_target_ingredient_dict[operator][target_node].items():
#                 print(f"{operator} - {target_node} - {ingredient_node} - {stats}")

#             print("\n")

In [134]:
for operator in op_target_dict.keys():
    for target_node in op_target_ingredient_dict[operator].keys():
        for ingredient_node, stats in op_target_ingredient_dict[operator][target_node].items():
            iter_num = stats[0] + stats[1]
            stats_percent = [make_percent(el / iter_num) for el in stats]
            print(f"{operator},{target_node},{ingredient_node},{iter_num}," ,end="")
            print(f"{stats_percent[0]},{stats_percent[1]},{stats_percent[2]},{stats_percent[3]},{stats_percent[4]},{stats_percent[5]}")      

StmtInsertion,expr_stmt,for,2535,77.51%,22.49%,0.36%,2.01%,20.12%,9.86%
StmtInsertion,expr_stmt,expr_stmt,7298,29.01%,70.99%,2.0%,19.03%,49.96%,19.36%
StmtInsertion,expr_stmt,decl_stmt,5616,81.32%,18.68%,0.0%,0.46%,18.22%,8.8%
StmtInsertion,expr_stmt,return,2915,91.66%,8.34%,0.0%,0.0%,8.34%,7.0%
StmtInsertion,expr_stmt,if,2867,45.03%,54.97%,0.21%,4.95%,49.81%,22.92%
StmtInsertion,expr_stmt,while,530,49.25%,50.75%,0.38%,10.94%,39.43%,12.26%
StmtInsertion,expr_stmt,continue,48,97.92%,2.08%,0.0%,0.0%,2.08%,0.0%
StmtInsertion,decl_stmt,decl_stmt,5108,89.7%,10.3%,0.0%,0.49%,9.81%,4.84%
StmtInsertion,decl_stmt,expr_stmt,5558,83.75%,16.25%,0.11%,5.78%,10.36%,4.17%
StmtInsertion,decl_stmt,for,2208,88.59%,11.41%,0.05%,2.13%,9.24%,4.85%
StmtInsertion,decl_stmt,if,2881,75.43%,24.57%,0.0%,0.07%,24.51%,11.56%
StmtInsertion,decl_stmt,return,3224,97.46%,2.54%,0.0%,0.0%,2.54%,2.23%
StmtInsertion,decl_stmt,while,383,85.38%,14.62%,0.0%,8.36%,6.27%,2.61%
StmtInsertion,decl_stmt,continue,51,100.0%,0.0%,0.

In [1]:
comp_op_per_program_dict = {
    'BFS': {'./while[1]/if[1]/operator_comp[1]': '=='},
    'DFS': {'./if[2]/operator_comp[1]': '=='},
    'DETECT_CYCLE': {
        './while[1]/if[1]/operator_comp[1]': '==', 
        './while[1]/if[2]/operator_comp[1]': '=='
    },
    'FIND_IN_SORTED': {
        './if[1]/operator_comp[1]': '==',
        './if[2]/operator_comp[1]': '<', 
        './if[3]/operator_comp[1]': '>'
    },
    'FLATTEN': dict(),
    'GET_FACTORS': {
        './if[1]/operator_comp[1]': '==', 
        './for[1]/operator_comp[1]': '<', 
        './for[1]/if[1]/operator_comp[1]': '=='
    },
    'IS_VALID_PARENTHESIZATION': {
        './for[1]/operator_comp[1]': '<', 
        './for[1]/if[2]/operator_comp[1]': '<'
    },
    'KHEAPSORT': dict(),
    'KNAPSACK': {
        './for[1]/operator_comp[1]': '<=', 
        './for[1]/if[1]/operator_comp[1]': '>=',
        './for[1]/for[1]/operator_comp[1]': '<=', 
        './for[1]/for[1]/if[1]/operator_comp[1]': '==', 
        './for[1]/for[1]/if[1]/operator_comp[2]': '==', 
        './for[1]/for[1]/if[2]/operator_comp[1]': '<'
    },
    'KTH': {
        './for[1]/if[1]/operator_comp[1]': '<',
        './for[1]/if[2]/operator_comp[1]': '>', 
        './if[1]/operator_comp[1]': '<', 
        './if[2]/operator_comp[1]': '>='},
    'LCS_LENGTH': {
        './for[1]/operator_comp[1]': '<', 
        './for[1]/for[1]/operator_comp[1]': '<', 
        './for[2]/operator_comp[1]': '<', 
        './for[2]/for[1]/operator_comp[1]': '<', 
        './for[2]/for[1]/if[1]/operator_comp[1]': '==', 
        './if[1]/for[1]/operator_comp[1]': '<'
    },
    'LEVENSHTEIN': {'./if[2]/operator_comp[1]': '=='},
    'LIS': {
        './for[1]/for[1]/operator_comp[1]': '<', 
        './for[1]/for[1]/if[1]/operator_comp[1]': '<', 
        './for[1]/if[1]/operator_comp[1]': '==', 
        './for[1]/if[1]/operator_comp[2]': '<'
    },
    'LONGEST_COMMON_SUBSEQUENCE': {
        './if[2]/operator_comp[1]': '==', 
        './return[1]/operator_comp[1]': '>='
    },
    'MAX_SUBLIST_SUM': dict(),
    'MERGESORT': {
        './while[1]/operator_comp[1]': '<', 
        './while[1]/operator_comp[2]': '<', 
        './while[1]/if[1]/operator_comp[1]': '<=', 
        './if[1]/operator_comp[1]': '=='
    },
    'NEXT_PALINDROME': {
        './while[1]/operator_comp[1]': '<', 
        './while[1]/operator_comp[2]': '>=', 
        './while[1]/if[1]/operator_comp[1]': '==', 
        './while[1]/if[2]/operator_comp[1]': '!='
    },
    'NEXT_PERMUTATION': {
        './for[1]/operator_comp[1]': '!=', 
        './for[1]/if[1]/operator_comp[1]': '<', 
        './for[1]/if[1]/for[1]/operator_comp[1]': '!=', 
        './for[1]/if[1]/for[1]/if[1]/operator_comp[1]': '<', 
        './for[1]/if[1]/for[1]/if[1]/for[1]/operator_comp[1]': '!='
    },
    'PASCAL': {
        './for[1]/operator_comp[1]': '<', 
        './for[1]/for[1]/operator_comp[1]': '<', 
        './for[1]/for[1]/if[1]/operator_comp[1]': '>', 
        './for[1]/for[1]/if[2]/operator_comp[1]': '<'
    },
    'POWERSET': dict(),
    'QUICKSORT': {
        './for[1]/if[1]/operator_comp[1]': '<', 
        './for[1]/if[2]/operator_comp[1]': '>'
    },
    'SHORTEST_PATH_LENGTH': {
        './while[1]/if[1]/operator_comp[1]': '==', 
        './while[1]/for[1]/if[2]/operator_comp[1]': '==', 
        './for[1]/if[1]/operator_comp[1]': '<'
    },
    'SHORTEST_PATH_LENGTHS': {
        './for[1]/operator_comp[1]': '<', 
        './for[1]/for[1]/operator_comp[1]': '<', 
        './for[1]/for[1]/if[1]/operator_comp[1]': '==', 
        './for[2]/operator_comp[1]': '<', 
        './for[2]/for[1]/operator_comp[1]': '<', 
        './for[2]/for[1]/for[1]/operator_comp[1]': '<'
    },
    'SIEVE': {
        './for[3]/expr_stmt[1]/operator_comp[1]': '>', 
        './for[4]/operator_comp[1]': '<'
    },
    'SUBSEQUENCES': {
        './if[1]/operator_comp[1]': '==', 
        './for[1]/operator_comp[1]': '<'
    },
    'WRAP': {
        './while[1]/operator_comp[1]': '>', 
        './while[1]/if[1]/operator_comp[1]': '=='
    } 
}