In [1]:
import os
import pandas as pd

In [2]:
import re
import ast

def parse_log_data(file_path):

    data = {}

    with open(file_path, 'r') as file:
        text = file.read()

    # Regular expression patterns
    time_pattern = r'(\w+ \w+) took (\d+\.\d+) seconds'
    num_sleep_pattern = r'Mutation generation num sleep @ \d+\.\d+ sec \/ sleep: (\d+)'
    num_mutants_pattern = r'deepmufl generated (\d+) mutants'
    non_viable_pattern = r'out of which (\d+) turned out to be non-viable'
    # result_pattern = r'\s*(\w+ \w+ \- Type \d+):\s+(SBI|Ochiai) (Avg|Max): (.*)'
    # result_pattern = r'\s*(\w+ \w+ \- Type \d+):\s+(SBI|Ochiai) (Avg|Max):\s*({.*})'
    result_pattern = r'\s*(\w+\s\w+\s-\sType\s\d+):\s+(SBI|Ochiai)\s(Avg|Max):\s*(\{.*?\})'

    # Extract time data, number of sleeps, etc.
    for line in text.splitlines():
        match = re.search(time_pattern, line)
        if match:
            data[match.group(1).replace(' ', '_')] = float(match.group(2))

        match = re.search(num_sleep_pattern, line)
        if match:
            data['mutation_generation_num_sleep'] = int(match.group(1))

        match = re.search(num_mutants_pattern, line)
        if match:
            data['num_mutants'] = int(match.group(1))

        match = re.search(non_viable_pattern, line)
        if match:
            data['num_nonviable_mutants'] = int(match.group(1))

    current_dict_index = 0
    dict_order = [
        'Metallaxis - Type 1_SBI_Avg',
        'Metallaxis - Type 1_SBI_Max',
        'Metallaxis - Type 1_Ochiai_Avg',
        'Metallaxis - Type 1_Ochiai_Max',
        'Metallaxis - Type 2_SBI_Avg',
        'Metallaxis - Type 2_SBI_Max',
        'Metallaxis - Type 2_Ochiai_Avg',
        'Metallaxis - Type 2_Ochiai_Max',
        'MUSE'
    ]

    for line in text.splitlines():
        if '{' in line:
            start_index = line.find('{') 
            end_index = line.rfind('}')

            if start_index != -1 and end_index != -1:
                dict_str = line[start_index:end_index + 1]

                try:
                    result_dict = ast.literal_eval(dict_str)
                    data[dict_order[current_dict_index]] = result_dict
                    current_dict_index += 1
                except (ValueError, SyntaxError):
                    print(f"Error converting dictionary string on line: {line}")

    return data



file_path = 'deepmufl-ase-2023-main/models/31880720_class_final_layer_softmax/02/25.txt'
results = parse_log_data(file_path)
print(results)
print('')
print(results['Metallaxis - Type 1_Ochiai_Avg']['L7'])


{'case_splitting': 0.2465345859527588, 'Mutation_execution': 68.66979479789734, 'num_mutants': 373, 'num_nonviable_mutants': 0, 'Metallaxis - Type 1_SBI_Avg': {'L0': 0.0, 'L3': 0.0, 'L6': 0.0, 'L1': 0.0, 'L4': 0.0, 'L7': 0.35}, 'Metallaxis - Type 1_SBI_Max': {'L0': 0, 'L3': 0, 'L6': 0, 'L1': 0, 'L4': 0, 'L7': 1.0}, 'Metallaxis - Type 1_Ochiai_Avg': {'L0': 0.0, 'L3': 0.0, 'L6': 0.0, 'L1': 0.0, 'L4': 0.0, 'L7': 0.4121320343559642}, 'Metallaxis - Type 1_Ochiai_Max': {'L0': 0, 'L3': 0, 'L6': 0, 'L1': 0, 'L4': 0, 'L7': 1.0}, 'Metallaxis - Type 2_SBI_Avg': {'L0': 0.0, 'L3': 0.0, 'L6': 0.0, 'L1': 0.0, 'L4': 0.0, 'L7': 0.0}, 'Metallaxis - Type 2_SBI_Max': {'L0': 0, 'L3': 0, 'L6': 0, 'L1': 0, 'L4': 0, 'L7': 0.0}, 'Metallaxis - Type 2_Ochiai_Avg': {'L0': 0.0, 'L3': 0.0, 'L6': 0.0, 'L1': 0.0, 'L4': 0.0, 'L7': 0.0}, 'Metallaxis - Type 2_Ochiai_Max': {'L0': 0, 'L3': 0, 'L6': 0, 'L1': 0, 'L4': 0, 'L7': 0.0}, 'MUSE': {'L0': 0.0, 'L3': 0.0, 'L6': 0.0, 'L1': 0.0, 'L4': 0.0, 'L7': 0.5}}

0.4121320343559

In [3]:
columns_list = ['model_name',
'100_generation_time',
'100_total_time',
'100_num_sleep',
'100_metallaxis_type_1_sbi_avg',
'100_metallaxis_type_1_sbi_max',
'100_metallaxis_type_1_ochiai_avg',
'100_metallaxis_type_1_ochiai_max',
'100_metallaxis_type_2_sbi_avg',
'100_metallaxis_type_2_sbi_max',
'100_metallaxis_type_2_ochiai_avg',
'100_metallaxis_type_2_ochiai_max',
'100_muse',
'75_generation_time',
'75_total_time',
'75_num_sleep',
'75_metallaxis_type_1_sbi_avg',
'75_metallaxis_type_1_sbi_max',
'75_metallaxis_type_1_ochiai_avg',
'75_metallaxis_type_1_ochiai_max',
'75_metallaxis_type_2_sbi_avg',
'75_metallaxis_type_2_sbi_max',
'75_metallaxis_type_2_ochiai_avg',
'75_metallaxis_type_2_ochiai_max',
'75_muse',
'50_generation_time',
'50_total_time',
'50_num_sleep',
'50_metallaxis_type_1_sbi_avg',
'50_metallaxis_type_1_sbi_max',
'50_metallaxis_type_1_ochiai_avg',
'50_metallaxis_type_1_ochiai_max',
'50_metallaxis_type_2_sbi_avg',
'50_metallaxis_type_2_sbi_max',
'50_metallaxis_type_2_ochiai_avg',
'50_metallaxis_type_2_ochiai_max',
'50_muse',
'25_generation_time',
'25_total_time',
'25_num_sleep',
'25_metallaxis_type_1_sbi_avg',
'25_metallaxis_type_1_sbi_max',
'25_metallaxis_type_1_ochiai_avg',
'25_metallaxis_type_1_ochiai_max',
'25_metallaxis_type_2_sbi_avg',
'25_metallaxis_type_2_sbi_max',
'25_metallaxis_type_2_ochiai_avg',
'25_metallaxis_type_2_ochiai_max',
'25_muse',
]
len(columns_list)

49

In [4]:

output_df = pd.DataFrame(columns=columns_list)
input_folder = 'deepmufl-ase-2023-main/models'
rows_list = []
for model in os.listdir(input_folder):
    model_path = os.path.join(input_folder,model)
    for model_iter in os.listdir(model_path):
        model_iter_path = os.path.join(model_path,model_iter)
        row_name = model + '_' +model_iter
        rows_list.append(row_name)
        for f in os.listdir(model_iter_path):
            if f.endswith(".txt"):
                file_path = os.path.join(model_iter_path,f)
output_df['model_name'] = rows_list
output_df

Unnamed: 0,model_name,100_generation_time,100_total_time,100_num_sleep,100_metallaxis_type_1_sbi_avg,100_metallaxis_type_1_sbi_max,100_metallaxis_type_1_ochiai_avg,100_metallaxis_type_1_ochiai_max,100_metallaxis_type_2_sbi_avg,100_metallaxis_type_2_sbi_max,...,25_num_sleep,25_metallaxis_type_1_sbi_avg,25_metallaxis_type_1_sbi_max,25_metallaxis_type_1_ochiai_avg,25_metallaxis_type_1_ochiai_max,25_metallaxis_type_2_sbi_avg,25_metallaxis_type_2_sbi_max,25_metallaxis_type_2_ochiai_avg,25_metallaxis_type_2_ochiai_max,25_muse
0,31880720_class_final_layer_softmax_03,,,,,,,,,,...,,,,,,,,,,
1,31880720_class_final_layer_softmax_02,,,,,,,,,,...,,,,,,,,,,
2,31880720_class_final_layer_softmax_01,,,,,,,,,,...,,,,,,,,,,
3,59758722_reg_final_layer_relu_03,,,,,,,,,,...,,,,,,,,,,
4,59758722_reg_final_layer_relu_02,,,,,,,,,,...,,,,,,,,,,
5,59758722_reg_final_layer_relu_01,,,,,,,,,,...,,,,,,,,,,
6,33969059_reg_final_layer_softmax_03,,,,,,,,,,...,,,,,,,,,,
7,33969059_reg_final_layer_softmax_02,,,,,,,,,,...,,,,,,,,,,
8,33969059_reg_final_layer_softmax_01,,,,,,,,,,...,,,,,,,,,,
9,58844149_class_final_layer_softmax_03,,,,,,,,,,...,,,,,,,,,,


In [5]:
def find_max_key(data, model_name):

    max_value = max(data.values())
    max_keys = [key for key, value in data.items() if value == max_value]

    # Check if there are multiple keys with the maximum value
    if len(max_keys) > 1:
        return 0

    # Find the maximum 'N' value among the keys with the max value
    max_n = max(int(key[1:]) for key in data.keys())

    if 'hidden' in model_name:
        max_n = 1

    # Check if 'L' followed by max_n is the key with the max value
    if f"L{max_n}" != max_keys[0]:
        return 0

    return 1

In [6]:
for model in os.listdir(input_folder):
    model_path = os.path.join(input_folder,model)
    for model_iter in os.listdir(model_path):
        model_iter_path = os.path.join(model_path,model_iter)
        row_name = model + '_' + model_iter
        for f in os.listdir(model_iter_path):
            if f.endswith(".txt"):
                file_path = os.path.join(model_iter_path,f)
                values_dict = parse_log_data(file_path)
                if values_dict.get('Mutation_generation') is not None:
                    output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_generation_time'] = values_dict['Mutation_generation']
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_total_time'] = values_dict['case_splitting'] + values_dict['Mutation_execution']
                if values_dict.get('mutation_generation_num_sleep') is not None:
                    output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_num_sleep'] = values_dict['mutation_generation_num_sleep']
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_metallaxis_type_1_sbi_avg'] = find_max_key(values_dict['Metallaxis - Type 1_SBI_Avg'], row_name)
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_metallaxis_type_1_sbi_max'] = find_max_key(values_dict['Metallaxis - Type 1_SBI_Max'], row_name)
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_metallaxis_type_1_ochiai_avg'] = find_max_key(values_dict['Metallaxis - Type 1_Ochiai_Avg'], row_name)
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_metallaxis_type_1_ochiai_max'] = find_max_key(values_dict['Metallaxis - Type 1_Ochiai_Max'], row_name)
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_metallaxis_type_2_sbi_avg'] = find_max_key(values_dict['Metallaxis - Type 2_SBI_Avg'], row_name)
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_metallaxis_type_2_sbi_max'] = find_max_key(values_dict['Metallaxis - Type 2_SBI_Max'], row_name)
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_metallaxis_type_2_ochiai_avg'] = find_max_key(values_dict['Metallaxis - Type 2_Ochiai_Avg'], row_name)
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_metallaxis_type_2_ochiai_max'] = find_max_key(values_dict['Metallaxis - Type 2_Ochiai_Max'], row_name)
                output_df.loc[output_df['model_name']==row_name,f'{f.split(".")[0]}_muse'] = find_max_key(values_dict['MUSE'], row_name)
                
            

hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden
hidden


In [7]:
def fillnullvalues(df):
    df['25_generation_time'] = df['25_generation_time'].fillna(df['100_generation_time'])
    df['50_generation_time'] = df['50_generation_time'].fillna(df['100_generation_time'])
    df['75_generation_time'] = df['75_generation_time'].fillna(df['100_generation_time'])
    df['25_num_sleep'] = df['25_num_sleep'].fillna(df['100_num_sleep'])
    df['50_num_sleep'] = df['50_num_sleep'].fillna(df['100_num_sleep'])
    df['75_num_sleep'] = df['75_num_sleep'].fillna(df['100_num_sleep'])

    return df

final_output_df = fillnullvalues(output_df)

final_output_df.to_csv("experiment_results.csv",index=False)