## Processing the log files of A-BUS and keep last runs logs only

In [21]:
import os
import re
from datetime import datetime, timedelta
import glob

def parse_time_taken(time_str):
    """Parse the 'Time taken' string into a timedelta object."""
    hours, minutes, seconds = map(float, time_str.split(':'))
    return timedelta(hours=hours, minutes=minutes, seconds=seconds)

def parse_line_for_timestamp(line):
    """Extract timestamp from a line if present."""
    timestamp_regex = r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d+'
    match = re.search(timestamp_regex, line)
    if match:
        return datetime.strptime(match.group(), '%Y-%m-%d %H:%M:%S.%f')
    return None

def process_file(file_path):
    lines_to_keep = []
    start_time = None
    end_time = None
    current_time = None
    is_successful = False

    with open(file_path, 'r') as file:
        for line in file:
            if 'Time taken:' in line:
                time_taken_str = line.split('Time taken:')[1].strip()
                time_taken = parse_time_taken(time_taken_str)
                end_time = parse_line_for_timestamp(previous_line)
                start_time = end_time - time_taken - timedelta(seconds=100)
                print(f"Start time: {start_time}, End time: {end_time}")
            
            if 'Success' in line:
                is_successful = True

            previous_line = line
    
    if is_successful:
        with open(file_path, 'r') as file:
            # Go back to the start of the file and read it again
            if start_time and end_time:
                for line in file:
                    new_current_time = parse_line_for_timestamp(line)
                    current_time = new_current_time if new_current_time else current_time
                    if current_time and start_time <= current_time <= end_time:
                        lines_to_keep.append(line)

    # Write the filtered lines back to the file
    # with open(file_path, 'w') as file:
    #     file.writelines(lines_to_keep)
    
    # go to the parent directory and create a new directory called 'filtered' if it doesn't exist
    # and write the filtered file there with the same name
    parent_dir = os.path.dirname(file_path)
    # go one level up to the parent directory
    parent_dir = os.path.dirname(parent_dir)
    filtered_dir = os.path.join(parent_dir, 'filtered')
    if not os.path.exists(filtered_dir):
        os.makedirs(filtered_dir)
    filtered_file_path = os.path.join(filtered_dir, os.path.basename(file_path))
    with open(filtered_file_path, 'w') as file:
        file.writelines(lines_to_keep)
    

def process_files_in_directory(directory_path, file_pattern):
    for file_path in glob.glob(os.path.join(directory_path, file_pattern)):
        print(f"Processing file: {file_path}")
        process_file(file_path)


In [None]:
# BUS _ sygus benchmarks
# Replace these with your directory path and file pattern
directory_path = '../../logs/bus_89_Jan10_2024/augmented'
file_pattern = '*_A-BUS_*_1.log'
process_files_in_directory(directory_path, file_pattern)

In [None]:
# BUS _ 38 benchmarks
directory_path = '../../logs/bus_38_Jan10_2024/augmented'
file_pattern = '*_A-BUS_*_1.log'
process_files_in_directory(directory_path, file_pattern)

In [None]:
# BUSTLE _ sygus benchmarks
# Replace these with your directory path and file pattern
directory_path = '../../logs/bustle_89/augmented'
file_pattern = '*_A-Bustle_*_1_*.log'
process_files_in_directory(directory_path, file_pattern)

In [None]:
# BUSTLE _ sygus benchmarks
# Replace these with your directory path and file pattern
directory_path = '../../logs/bustle_38/augmented'
file_pattern = '*_A-Bustle_*_1_*.log'
process_files_in_directory(directory_path, file_pattern)

## Count the number of Iterations in the log file

In [116]:
import os

def count_iterations(dirs, augmented):
    result_dict = {}

    for key, value in dirs.items():
        sub_dirs = [augmented]
        for sub_dir in sub_dirs:
            # get all the files in the directory
            files = os.listdir(value + sub_dir)
            # for each file
            for file in files:
                # find the run from the file name if Bustle/Bee in the file name
                # bustle file name patterns: <key>_A-Bustle_<id>_<augmented/non_augmented>_<run>.log
                # bee file name patterns: <key>_A-Bee_<id>_<augmented/non_augmented>_<run>.log
                if ('Bustle' in file):
                    taskId = file.split('_')[-3].split('.')[0]
                    run = file.split('_')[-1].split('.')[0]
                if ('bee' in value):
                    taskId = file.split('_')[0]
                    run = file.split('_')[-1].split('.')[0]
                if ('A-BUS' in file):
                    taskId = file.split('_')[-3]
                # open the file
                with open(value + sub_dir + file, 'r') as f:
                    # print('File:', file)
                    # read the file
                    lines = f.readlines()
                    # for each line
                    is_solution = False
                    for line in lines:
                        # if the line contains the word 'error'
                        if 'Result: Success' in line:
                            is_solution = True
                            break
                    if is_solution:
                        count = 0
                        program = ''
                        program_size = 0
                        size_total = 0
                        cur_size = 0
                        for line in lines:
                            if ('partial program added to the DSL: ' in line) or ('Program: ' in line) or ('Augmented search did not find a solution. Best programs found: ' in line):
                                count += 1
                            # if ('Number of Iterations: ' in line):
                            #     count = int(line.split('Number of Iterations: ')[1].strip())
                            if ('Program: ' in line):
                                program = line.split('Program: ')[1].strip()
                            if ('Program size: ' in line):
                                program_size = float(line.split('Program size: ')[1].strip())
                            # extract the size from the line '[Task: 35] Size: 12 Evaluations: 4994471' 
                            if ('Size: ' in line):
                                cur_size = max(cur_size, int(line.split('Size: ')[1].split(' Evaluations:')[0].strip()))
                            else:
                                size_total += cur_size
                                cur_size = 0
                        
                        if 'A-BUS' in file or 'BUS' in file:
                            if key not in result_dict:
                                result_dict[key] = []
                            result_dict[key].append({"Iteration":count, "Program": program, "Size": (program_size if 'bee' in value else size_total)})
                        if 'bee' in value:
                            if key not in result_dict:
                                result_dict[key] = {}
                            if run not in result_dict[key]:
                                result_dict[key][run] = []
                            result_dict[key][run].append({"Iteration":count, "Program": program, "Size": (program_size if 'bee' in value else size_total)})
                        if 'Bustle' in file:
                            if key not in result_dict:
                                result_dict[key] = {}
                            if run not in result_dict[key]:
                                result_dict[key][run] = []
                            result_dict[key][run].append({"Iteration":count, "Program": program, "Size": (program_size if 'bee' in value else size_total)})

    return result_dict

In [27]:
# directory of the logs of A-BUS
dirs = {38:'../../logs/bus_38_Jan23_2024/', 89:'../../logs/bus_89_Jan23_2024/'}
result_bus = count_iterations(dirs, 'augmented/')#, 'non_augmented/')
import json
print(json.dumps(result_bus, indent=4))

# save the json to a file
with open('A-BUS.json', 'w') as f:
    json.dump(result_bus, f)

{
    "38": [
        {
            "Iteration": 1,
            "Program": "arg_0.Length().IntToStr()",
            "Size": 2
        },
        {
            "Iteration": 2,
            "Program": "(ifEqual(\"100%\",arg_0) then \"Completed\" else (ifEqual(\"0%\",arg_0[ length-2:]) then \"Not Yet Started\" else \"In Progress\"))",
            "Size": 14
        },
        {
            "Iteration": 1,
            "Program": "arg_1[:arg_1.IndexOf(\" \")]",
            "Size": 4
        },
        {
            "Iteration": 1,
            "Program": "(arg_0.Length() - arg_0.replace(\"/\", \"\").Length()).IntToStr()",
            "Size": 8
        },
        {
            "Iteration": 1,
            "Program": "arg_0.replace(\"<COMPANY>\", arg_1)",
            "Size": 3
        },
        {
            "Iteration": 2,
            "Program": "concat(arg_0[:-4][ length-2:], concat(arg_0[:2], concat(\"/\", arg_0)[:3] + \"/\" + concat(\"/\", arg_0)[3+2:]).Substr(2:2+15))",
            "Size":

In [117]:
# directory of the logs of BUS
dirs = {38:'../../logs/bus_38_Jan10_2024/', 89:'../../logs/bus_89_Jan10_2024/'}
result_bus = count_iterations(dirs, 'non_augmented/')
import json
print(json.dumps(result_bus, indent=4))

# save the json to a file
with open('BUS.json', 'w') as f:
    json.dump(result_bus, f)

{
    "38": [
        {
            "Iteration": 1,
            "Program": "arg_0.replace(\"-\", \"/\")[ length-(1 + 4):]",
            "Size": 7
        },
        {
            "Iteration": 1,
            "Program": "concat(arg_0[:15], \"...\").replace(concat(arg_0, \"...\"), arg_0)",
            "Size": 9
        },
        {
            "Iteration": 1,
            "Program": "arg_0[:(0 - arg_1.Length())]",
            "Size": 5
        },
        {
            "Iteration": 1,
            "Program": "concat(arg_0[:-4][ length-2:], concat(\"/\", arg_0)[:3] + \"/\" + concat(\"/\", arg_0)[3+2:])",
            "Size": 12
        },
        {
            "Iteration": 1,
            "Program": "(ifEqual(\"NONE\",arg_0) then arg_1 else arg_0)",
            "Size": 5
        },
        {
            "Iteration": 1,
            "Program": "arg_0.Substr(arg_0.IndexOf(\"/\",(4 + 4)):arg_0.IndexOf(\"/\",(4 + 4))+15)",
            "Size": 8
        },
        {
            "Iteration": 1,
      

In [28]:
# directory of the logs of Bustle
dirs = {38:'../../logs/bustle_38/', 89:'../../logs/bustle_89/'}
result_bustle = count_iterations(dirs, 'filtered/')#, 'non_augmented/')
print(json.dumps(result_bustle, indent=4))

# save the json to a file
with open('A-Bustle.json', 'w') as f:
    json.dump(result_bustle, f)

{
    "38": {
        "3": [
            {
                "Iteration": 1,
                "Program": "concat(arg_0.upper()[:1], arg_0.lower().Substr(1:1+arg_0.Length()))",
                "Size": 10
            },
            {
                "Iteration": 1,
                "Program": "arg_0.replace(\"-\", \"/\")[ length-(1 + 4):]",
                "Size": 7
            },
            {
                "Iteration": 1,
                "Program": "concat(\"Mr. \", arg_0.Substr(arg_0.IndexOf(\" \"):arg_0.IndexOf(\" \")+15).strip())",
                "Size": 13
            },
            {
                "Iteration": 1,
                "Program": "arg_0[:(arg_0.Length() - arg_1.Length())]",
                "Size": 6
            },
            {
                "Iteration": 1,
                "Program": "arg_0.lower().replace(arg_0[:15], arg_0.upper())",
                "Size": 7
            },
            {
                "Iteration": 2,
                "Program": "concat(\"Ms. \", arg

In [29]:
# directory of the logs of Bee
dirs = {38:'../../logs/bee_38_Jan24_2024/', 89:'../../logs/bee_89_Jan24_2024/'}
result_bee = count_iterations(dirs, 'augmented/')#, 'non_augmented/')
print(json.dumps(result_bee, indent=4))

# save the json to a file
with open('A-Bee.json', 'w') as f:
    json.dump(result_bee, f)

{
    "38": {
        "3": [
            {
                "Iteration": 1,
                "Program": "arg_0.Substr(arg_1.Length(),15)",
                "Size": 5.42
            },
            {
                "Iteration": 1,
                "Program": "arg_0.Length().IntToStr()",
                "Size": 3.57
            },
            {
                "Iteration": 1,
                "Program": "arg_1.Substr(0,arg_1.IndexOf(\" \"))",
                "Size": 6.0
            },
            {
                "Iteration": 1,
                "Program": "arg_0.Substr(arg_0.IndexOf(\"/\",(-3 * -3)),arg_0.Length())",
                "Size": 10.84
            },
            {
                "Iteration": 3,
                "Program": "(ifarg_1.replace(\" \", \"yes\").Contain(\" \") then (ifarg_1.Contain(arg_0.Substr(2,-3)) then (ifconcat(concat(\"no\", \"yes\"), arg_0).lower().Contain(arg_1) then \"yes\" else \"no\") else \"yes\") else (ifarg_1.Substr(-4,3).SuffixOf(arg_1) then (ifconcat(conc

## Crossbeam Log file processing

In [33]:
import json
results_cb = {}
for bn in ['new', 'sygus']:
    key = '38' if bn == 'new' else '89'
    results_cb[key] = {}
    for run in [1, 2, 3, 4, 5]:
        results_cb[key][run] = []
        for max_restart in [2, 4, 8]:
            result_file = '../../src_a_crossbeam/augmented/bustle_results/' + 'run_' + str(run) + '.'+str(max_restart) + '.grow-True.vw-bustle_sig-vsize.' + bn + '.json'
            res_json = json.load(open(result_file))
            for items in res_json['results']:
                iterations = items['attempt']
                is_success = items['success']
                program = items['solution']
                size = items['solution_weight']
                if is_success:
                    results_cb[key][run].append({"Iteration": iterations, "Program": program, "Size": size})
print(json.dumps(results_cb, indent=4))

# save the json to a file
with open('A-CB.json', 'w') as f:
    json.dump(results_cb, f)
                

{
    "38": {
        "1": [
            {
                "Iteration": 1,
                "Program": "Substitute(var_0, Substitute(var_0, '.', '.0'), Concatenate(var_0, '.0'))",
                "Size": 9
            },
            {
                "Iteration": 1,
                "Program": "Replace('+', Find('-', Concatenate(var_0, '-')), 1, var_0)",
                "Size": 9
            },
            {
                "Iteration": 1,
                "Program": "Concatenate(Proper(Left(var_0, Find(',', var_0))), Upper(Right(var_0, 3)))",
                "Size": 11
            },
            {
                "Iteration": 1,
                "Program": "Replace(Lower(var_0), 1, 3, Proper(Left(var_0, 3)))",
                "Size": 9
            },
            {
                "Iteration": 2,
                "Program": "If(Exact(var_1, Lower(var_1)), Substitute(If(Exact(var_0, 'the text to search in'), 'FALSE', 'TRUE'), Left(var_1, 1), ''), 'TRUE')",
                "Size": 7
         

### process the A-BUS.json, A-Bee.json, A-Bustle.json and A-CB.json files and update the size of the programs

In [122]:
import json
import re


class ProgramParser:
    def parse_program(self, program_str):
        tokens = self.tokenize(program_str)
        parsed_program, _ = self.parse_tokens(tokens)
        return parsed_program

    def tokenize(self, program_str):
        # Step 1: Find all quoted strings and replace them with placeholders
        placeholders = {}
        def replace_with_placeholder(match):
            placeholder = f"__QUOTED{len(placeholders)}__"
            placeholders[placeholder] = match.group(0)
            return placeholder

        program_str = re.sub(r'"[^"]*"', replace_with_placeholder, program_str)

        # Step 2: Remove all commas and periods outside of quotes
        program_str = re.sub(r'(?<!\w)[,.](?!\w)', '', program_str)

        # Step 3: Replace placeholders with the original quoted strings
        for placeholder, quoted_str in placeholders.items():
            program_str = program_str.replace(placeholder, quoted_str)

        # Tokenize the modified string, including arithmetic operators
        tokens = re.findall(r'"[^"]*"|\w+|[\+\-\*/%]|\(|\)|\,|\.', program_str)
        return tokens

    def parse_tokens(self, tokens):
        parsed_program = []
        i = 0
        while i < len(tokens):
            token = tokens[i]
            if token in ['.', ',', '(', ')']:
                # Directly add punctuation tokens
                parsed_program.append(token)
                i += 1
            elif token.startswith('"') and token.endswith('"'):
                # Add string literals as is
                parsed_program.append(token)
                i += 1
            elif token.isalpha() or re.match(r'[\w\+\-\*/]', token):
                # Handle function names and variables
                if i + 1 < len(tokens) and tokens[i + 1] == '(':
                    # If the next token is '(', it's a function call
                    function_name = token
                    i += 2  # Move past function name and '('
                    args, new_index = self.parse_arguments(tokens, i)
                    i = new_index
                    parsed_program.append((function_name, args))
                else:
                    # Otherwise, it's a variable or standalone token
                    parsed_program.append(token)
                    i += 1
            else:
                # Unrecognized token
                i += 1
        return parsed_program, i

    def parse_arguments(self, tokens, start_index):
        args = []
        current_arg = []
        i = start_index
        while i < len(tokens) and tokens[i] != ')':
            if tokens[i] == ',':
                if current_arg:
                    args.append(current_arg)
                    current_arg = []
            else:
                current_arg.append(tokens[i])
            i += 1
        if current_arg:
            args.append(current_arg)
        return args, i + 1  # Return position after closing ')'

def remove_commas_periods(obj):
    if isinstance(obj, list):
        return [remove_commas_periods(x) for x in obj if x not in [',', '.']]
    elif isinstance(obj, tuple):
        # Apply the function to each element of the tuple
        return tuple(remove_commas_periods(x) for x in obj)
    else:
        return obj


def process_program_stack(program_object, stack_operators, stack_operands):
    for item in program_object:
        if isinstance(item, list):
            # Recursive call for nested structures
            process_program_stack(item, stack_operators, stack_operands)
        elif isinstance(item, tuple):
            # Increment count for the function call
            stack_operators.append(item[0])
            process_program_stack(item[1], stack_operators, stack_operands)
        elif item in list_operations:
            stack_operators.append(item)
        elif item == '(' or item == ')':
            # Increment count for parentheses
            stack_operands.append(item)
        else:
            # Increment count for operands
            stack_operands.append(item)

def process_program_object(program_object):
    node_count = 0
    i = 0
    while i < len(program_object):
        item = program_object[i]
        if isinstance(item, list):
            # Recursive call for nested structures
            node_count += process_program_object(item)
        elif isinstance(item, tuple):
            # Special handling for 'if .. then .. else' construct
            if item[0] == 'if' and i + 4 < len(program_object) and program_object[i + 2] == 'then' and program_object[i + 4] == 'else':
                node_count += 1  # Count the entire 'if .. then .. else' as one node
                node_count += process_program_object(item[1])  # Count the condition
                node_count += process_program_object(program_object[i + 3])  # Count the 'then' part
                node_count += process_program_object(program_object[i + 5])  # Count the 'else' part
                i += 6  # Skip past the entire 'if .. then .. else' construct
            else:
                # Count the operator itself
                node_count += 1
                # Count the arguments of the operator
                node_count += process_program_object(item[1])
        elif item in list_operations:
            # Count the operation
            node_count += 1
        elif isinstance(item, str) and not item in ['(', ')', ',', '.']:
            # Count operands (excluding parentheses, commas, and periods)
            node_count += 1
        else:
            i += 1
            continue  # Skip non-countable items
        i += 1

    return node_count

def count_size(program_str):
    parser = ProgramParser()
    total_nodes = None
    try:
        program_object = parser.parse_program(program_str)
        count_then_else = program_str.count('then') + program_str.count('else')
        # remove all the , and . from the program_object list that are not inside the quotes
        program_object = remove_commas_periods(program_object)
        # print(program_object)

        stack_operators = []
        stack_operands = []

        process_program_stack(program_object, stack_operators, stack_operands)
        print("Operators:", stack_operators)
        print("Operands:", stack_operands)

        total_nodes = process_program_object(program_object) - count_then_else
        # print("Size_" + str(count) + " = " + str(total_nodes))
    except:
        pass
    return total_nodes

def call_parser(algo, json_data = {}, operations = []):
    # Example usage
    global list_operations
    list_operations = operations
    
    # json result
    json_result = {}
    
    bns = ['89', '38']
    runs = ['1', '2', '3', '4', '5']
    for key in bns:
        if algo != 'A-BUS' and algo != 'BUS':
            json_result[key] = {}
            for cur_run in runs:
                json_result[key][cur_run] = []
                for entry in json_data[key][cur_run]:
                    program_str = entry["Program"]
                    iterations = entry["Iteration"]
                    program_size = count_size(program_str)
                    json_result[key][cur_run].append({"Iteration":iterations, "Program":program_str, "Size":program_size})
        else:
            json_result[key] = []
            for entry in json_data[key]:
                program_str = entry["Program"]
                iterations = entry["Iteration"]
                program_size = count_size(program_str)
                json_result[key].append({"Iteration":iterations, "Program":program_str, "Size":program_size})
    
    return json_result
                

In [48]:
# test count size
parser = ProgramParser()
global list_operations

program_str = "Substitute(var_0, Substitute(var_0, \".\", \".0\"), Concatenate(var_0, \".0\"))"

list_operations = [
      'Add',
      'Concatenate',
      'Find',
      'Left',
      'Len',
      'Mid',
      'Minus',
      'Replace',
      'Right',
      'Trim',
      'Lower',
      'Upper',
      'Proper',
      'Rept',
      'Substitute',
      'ToText',
      'If',
      'Exact',
      'Gt',
      'Gte',
  ]

count_size(program_str)


Operators: ['Substitute', 'Substitute', 'Concatenate']
Operands: ['var_0', '(', 'var_0', '"."', '".0"', 'var_0', '".0"', ')']


9

In [123]:
# BUS

json_data = json.load(open("BUS.json"))
operations = ['concat', 'replace', 'upper', 'lower', 'Substr', 'IndexOf', 'Length', 'CharAt', '+', '-', '*', '/', '%', 'Contain', 'Equal', 'if', 'then', 'else', 'SuffixOf', 'PrefixOf', 'StrToInt', 'IntToStr', 'stip', 'title']
json_result = call_parser('BUS', json_data, operations)

# save the json to a file
with open('BUS-size.json', 'w') as f:
    json.dump(json_result, f)


# # A-BUS

# json_data = json.load(open("A-BUS.json"))
# operations = ['concat', 'replace', 'upper', 'lower', 'Substr', 'IndexOf', 'Length', 'CharAt', '+', '-', '*', '/', '%', 'Contain', 'Equal', 'if', 'then', 'else', 'SuffixOf', 'PrefixOf', 'StrToInt', 'IntToStr', 'stip', 'title']
# json_result = call_parser('A-BUS', json_data, operations)

# # # save the json to a file
# # with open('A-BUS-size.json', 'w') as f:
# #     json.dump(json_result, f)


# # A-Bustle
# json_data = json.load(open("A-Bustle.json"))
# operations = ['concat', 'replace', 'upper', 'lower', 'Substr', 'IndexOf', 'Length', 'CharAt', '+', '-', '*', '/', '%', 'Contain', 'Equal', 'if', 'then', 'else', 'SuffixOf', 'PrefixOf', 'StrToInt', 'IntToStr', 'stip', 'title']
# json_result = call_parser('A-Bustle', json_data, operations)

# # # save the json to a file
# # with open('A-Bustle-size.json', 'w') as f:
# #     json.dump(json_result, f)


# # A-Bee
# json_data = json.load(open("A-Bee.json"))
# operations = ['concat', 'replace', 'upper', 'lower', 'Substr', 'IndexOf', 'Length', 'CharAt', '+', '-', '*', '/', '%', 'Contain', 'Equal', 'if', 'then', 'else', 'SuffixOf', 'PrefixOf', 'StrToInt', 'IntToStr', 'stip', 'title']
# json_result = call_parser('A-Bee', json_data, operations)

# # # save the json to a file
# # with open('A-Bee-size.json', 'w') as f:
# #     json.dump(json_result, f)
    

# # A-CB
# json_data = json.load(open("A-CB.json"))
# operations = [
#       'Add',
#       'Concatenate',
#       'Find',
#       'Left',
#       'Len',
#       'Mid',
#       'Minus',
#       'Replace',
#       'Right',
#       'Trim',
#       'Lower',
#       'Upper',
#       'Proper',
#       'Rept',
#       'Substitute',
#       'ToText',
#       'If',
#       'Exact',
#       'Gt',
#       'Gte',
#   ]
# json_result = call_parser('A-CB', json_data, operations)

# # # save the json to a file
# # with open('A-CB-size.json', 'w') as f:
# #     json.dump(json_result, f)


Operators: ['Substr', '-', '+', 'IndexOf', '-', '+', 'IndexOf', '+', '+', '+']
Operands: ['_arg_0', '(', '1', '_arg_0', '(', '"."', ')', '(', '1', '_arg_0', '"."', ')', '1', '(', '1', '1', ')', ')']
Operators: ['name', '-']
Operands: ['0', '3']
Operators: ['replace', '*', 'Length', '-', 'Length', 'strip']
Operands: ['_arg_0', '"/"', '" "', '_arg_0', '(', ')', 'length', '_arg_0']
Operators: ['Substr', '+', 'IndexOf', 'strip']
Operands: ['name', '1', '1', 'name', '(', '" "', ')']
Operators: ['-', '-', '+', '+']
Operands: ['_arg_0', '1', 'length', '1', '(', '1', '1', ')']
Operators: ['replace', 'IndexOf', '+', '+', 'replace', 'IndexOf', '+']
Operands: ['_arg_0', '" "', '""', '_arg_0', '" "', '" "', '_arg_0', '" "', '""', '_arg_0', '" "', '0']
Operators: ['concat', 'concat', 'IndexOf', '-', 'Length']
Operands: ['_arg_0', '(', '","', '_arg_0', '_arg_0', '(', '","', ')', ')', 'length', '_arg_0']
Operators: ['replace', '*', 'Length', '-', 'Length', 'strip']
Operands: ['_arg_0', '" "', '" "', 

In [125]:
# process results_bus
from numpy import NaN
import pandas as pd
import json
result_df_38 = pd.DataFrame(columns=['Iteration', 'BUS', 'A-BUS', 'A-Bustle', 'A-Bustle_std', 'A-Bee', 'A-Bee_std', 'A-CB', 'A-CB_std'])
result_df_89 = pd.DataFrame(columns=['Iteration', 'BUS', 'A-BUS', 'A-Bustle', 'A-Bustle_std', 'A-Bee', 'A-Bee_std', 'A-CB', 'A-CB_std'])
results = []
algos = ['BUS', 'A-BUS', 'A-Bustle', 'A-Bee', 'A-CB']

for alg in algos:
    results.append(json.load(open(alg + '-size.json')))


for pIndex in range(len(algos)):
    for key, value in results[pIndex].items():
        df = pd.DataFrame(columns=['Iteration', 'Program', 'Size'])
        if pIndex == 0 or pIndex == 1:
            # Create a list of dictionaries
            rows = [{'Iteration': item['Iteration'], 
                    'Program': item['Program'], 
                    'Size': int(item['Size'])} for item in value]
            # Convert list of dictionaries to DataFrame and append it
            df = pd.concat([df, pd.DataFrame(rows)], ignore_index=True)
        else:
            for run in ['1', '2', '3', '4', '5']:
                for item in value[run]:
                    # Create a list of dictionaries
                    rows = [{'Iteration': item['Iteration'], 
                            'Program': item['Program'], 
                            'Size': int(item['Size'])} for item in value[run]]
                    # Convert list of dictionaries to DataFrame and append it
                    df = pd.concat([df, pd.DataFrame(rows)], ignore_index=True)
            
        max_iter = df['Iteration'].max()
        for i in range(1, (5 if key == '38' else 12)):
            # add the iteration column to the result_df
            if key == '89':
                result_df_89.loc[i-1, 'Iteration'] = i
            else:
                result_df_38.loc[i-1, 'Iteration'] = i
        
        for i in range(1, (5 if key == '38' else 12)):
            if algos[pIndex] == 'A-BUS' or algos[pIndex] == 'BUS':
                avg_size = df[df['Iteration'] == i]['Size'].mean()
                # at ith row ( 1 indexed) add the average size
                if key == '89':
                    result_df_89.loc[i-1, algos[pIndex]] = avg_size
                else:
                    result_df_38.loc[i-1, algos[pIndex]] = avg_size
            else:
                avg_size = df[df['Iteration'] == i]['Size'].mean()
                std_size = df[df['Iteration'] == i]['Size'].std()
                # at ith row ( 1 indexed) add the average size
                if key == '89':
                    result_df_89.loc[i-1, algos[pIndex]] = avg_size
                    result_df_89.loc[i-1, algos[pIndex] + '_std'] = std_size
                else:
                    result_df_38.loc[i-1, algos[pIndex]] = avg_size
                    result_df_38.loc[i-1, algos[pIndex] + '_std'] = std_size                

# do ffill for the NaN values
result_df_38.fillna(method='ffill', inplace=True)
result_df_89.fillna(method='ffill', inplace=True)

print(result_df_38)
print(result_df_89)

   Iteration     BUS      A-BUS   A-Bustle  A-Bustle_std      A-Bee  \
0          1  8.8125   8.357143   9.322873      4.199974   9.892322   
1          2  8.8125  20.250000  33.082278     16.145424  26.181818   
2          3  8.8125  20.250000  33.082278     16.145424  59.190476   
3          4  8.8125  20.250000  33.082278     16.145424  54.500000   

   A-Bee_std       A-CB   A-CB_std  
0   3.620829   9.320484   3.828605  
1   7.939725  21.698769   8.868376  
2  12.094863  47.458840  20.867828  
3  12.602043  47.458840  20.867828  
    Iteration       BUS      A-BUS    A-Bustle  A-Bustle_std       A-Bee  \
0           1  9.297297   9.069444   10.824171      6.806589   10.732047   
1           2  9.297297  29.222222   37.094945     26.092503   33.090613   
2           3  9.297297  29.222222   37.094945     26.092503  168.020253   
3           4  9.297297  68.250000   84.251220     44.763194   98.000000   
4           5  9.297297  68.250000   84.251220     44.763194   98.000000   
5  

In [131]:
import matplotlib.pyplot as plt
import matplotlib.ticker as tick
import matplotlib
import pandas as pd
import numpy as np

# Set Matplotlib to use LaTeX for rendering
matplotlib.use('pgf')
matplotlib.rcParams.update({
    'pgf.texsystem': 'pdflatex',
    'font.family': 'serif',
    'text.usetex': True,
    'pgf.rcfonts': False,
})

NUMBER_TICKS = 6
NUMBER_TICKS_Y = 8

algos = ['A-BUS', 'BUS']#, 'A-Bustle', 'A-Bee', 'A-CB']

def plot_benchmark(ax, df, benchmark_name, labels, line_styles):
    ax.set_title(f'{benchmark_name} Benchmark', fontsize=16)

    for index, column in enumerate(algos):
        style = line_styles[index]
        label = labels[index]

        # Plot the line and capture the color
        line = ax.plot(df['Iteration'], df[column], label=label, linestyle=style)
        line_color = line[0].get_color()  # Get the color of the line

        # Apply fill_between with the same color for columns except 'A-BUS'
        if column != 'A-BUS' and column != 'BUS':
            ax.fill_between(df['Iteration'], 
                            df[column] - df[column + '_std'], 
                            df[column] + df[column + '_std'], 
                            alpha=0.2, 
                            color=line_color)


    # Set plot limits, labels, and other configurations
    y_start = 0 if benchmark_name == 'SyGuS' else 0
    y_end = 100 if benchmark_name == 'SyGuS' else 30
    ax.set_ylim(y_start, y_end)
    ax.set_xticks(np.linspace(df['Iteration'].min(), df['Iteration'].max(), NUMBER_TICKS, dtype=int))
    y_ticks = np.linspace(y_start, y_end, NUMBER_TICKS_Y, dtype=int)
    ax.set_yticks(y_ticks)

    ax.tick_params(axis='both', which='major', labelsize=10)
    ax.margins(x=0.12, y=0.12)
    if benchmark_name == 'SyGuS':
        ax.legend(loc='upper right', fontsize=8, ncol=2)
    else:
        ax.legend(loc='lower right', fontsize=8, ncol=2)
    ax.set_xlabel("Number of Iterations", fontsize=10)
    if benchmark_name == 'SyGuS':
        ax.set_ylabel("Average Program Size", fontsize=10)
    # ax.set_ylabel("Average Program Size", fontsize=10)
    ax.grid(False)

def main():
    # Prepare the plotting data
    labels = [r"A-BUS", r"BUS"]#, r"A-Bustle", r"A-Bee", r"A-CB"]
    line_styles = [(0, (5, 5)), 'solid', (0, (5, 1)), (0, (3, 1, 1, 1))]

    # Define the figure size and DPI
    px = 1/plt.rcParams['figure.dpi']
    fig, axs = plt.subplots(1, 2, figsize=(620*px, 270*px), 
                            gridspec_kw={'hspace': 0.35, 'wspace': 0.14, 'bottom':0.15, 'left':0.089, 'right':0.99, 'top':0.85})

    plot_benchmark(axs[0], result_df_89, 'SyGuS', labels, line_styles)
    plot_benchmark(axs[1], result_df_38, '38', labels, line_styles)

    # Save the figure
    plt.savefig("./average_program_size_A-BUS_BUS.pdf")

if __name__ == "__main__":
    main()