In [1]:
import ast
import os
import re

import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

from _import_from_src import search_log_files
from _import_from_src import LogParser
from _import_from_src import LOG_DIR

target = os.path.join(LOG_DIR, 'E4_retrieved')

color_dict = {
    'GP': px.colors.qualitative.Plotly[0],
    'TP': px.colors.qualitative.Plotly[1],
    'AGT': px.colors.qualitative.Plotly[2],
}

In [4]:
class LogParser:
    def __init__(self, file_path):
        self.file_path = file_path
        self.settings = {}
        self.initial_data = {"X_initial": [], "y_initial": []}
        self.bo_data = {"X_new": [], "y_new": [], "Beta": [], "Iteration": []}
        self.objective = None

    def _combine_log_entries(self):
        with open(self.file_path, "r") as file:  # 修正: self.log_file → self.file_path
            lines = file.readlines()

        timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} - "

        combined_lines = []
        current_entry = ""

        for line in lines:
            if re.match(timestamp_pattern, line):
                if current_entry:
                    combined_lines.append(current_entry.strip())
                current_entry = line.strip()
            else:
                current_entry += " " + line.strip()

        if current_entry:
            combined_lines.append(current_entry.strip())

        return combined_lines

    def _parse_settings(self, line):
        settings_str = line.split("settings:")[1].strip()
        try:
            # Parse the settings safely
            self.settings = self._safe_parse_settings(settings_str)
            
            # More flexible regex to capture the class name of the objective_function
            obj_func_match = re.search(r"objective_function':\s*<.*?\.([A-Za-z0-9_]+)\s+object\s+at", settings_str)
            
            if obj_func_match:
                objective_function_name = obj_func_match.group(1)
                # print(f"Objective Function: {objective_function_name}")
            else:
                # print("Objective Function not found in settings.")
                pass
                
        except SyntaxError as e:
            print(f"Failed to parse settings: {e}")
            self.settings = settings_str

    def _safe_parse_settings(self, settings_str):
        # array と <function>, <class> の部分を文字列に置換
        settings_str = re.sub(r"<function\s+\w+\s+at\s+0x[0-9a-fA-F]+>", "'<function>'", settings_str)
        settings_str = re.sub(r"<class\s+'\w+\.\w+'>", "'<class>'", settings_str)
        settings_str = re.sub(r"<\w+\.\w+\s+object\s+at\s+0x[0-9a-fA-F]+>", "'<object>'", settings_str)
        
        # 辞書を文字列から生成（evalは使わず安全なast.literal_evalを使う）
        settings_dict = ast.literal_eval(settings_str)
        
        return settings_dict

    def parse_log_file(self):
        combined_lines = self._combine_log_entries()
        current_data = {}
        
        for line in combined_lines:
            # Parse settings
            if "Start BO with settings:" in line:
                settings_str = re.search(r"Start BO with settings: (.*)", line).group(1)

                try:
                    self._parse_settings(line)
                except Exception as e:
                    # print(f"Failed to parse settings: {e}")
                    self.settings = settings_str
            
            # Parse X_initial and y_initial
            elif "X initial:" in line:
                x_initial_str = re.search(r"X initial: (\[.*?\])", line).group(1)
                self.initial_data["X_initial"].append(self._extract_float_list(x_initial_str))
            elif "y initial:" in line:
                y_initial_str = re.search(r"y initial: (\[.*?\])", line).group(1)
                self.initial_data["y_initial"].append(self._extract_float_list(y_initial_str))

            # Parse Beta, Iteration, X_new, and y_new
            elif "Beta:" in line:
                current_data["Beta"] = float(re.search(r"Beta: ([-+]?\d*\.\d+|\d+)", line).group(1))
            elif "Iteration:" in line:
                current_data["Iteration"] = int(re.search(r"Iteration: (\d+) /", line).group(1))
            elif "X new:" in line:
                current_data["X_new"] = self._extract_float_list(re.search(r"X new: (\[\[.*?\]\])", line).group(1))
            elif "y new:" in line:
                current_data["y_new"] = self._extract_float_list(re.search(r"y new: (\[\[.*?\]\])", line).group(1))

                # Check if all required fields are in `current_data`
                if all(key in current_data for key in ["Beta", "Iteration", "X_new", "y_new"]):
                    # Append the current data to `self.bo_data`
                    self.bo_data["Beta"].append(current_data["Beta"])
                    self.bo_data["Iteration"].append(current_data["Iteration"])
                    self.bo_data["X_new"].append(current_data["X_new"])
                    self.bo_data["y_new"].append(current_data["y_new"])

                    # Clear current_data for the next entry
                    current_data.clear()
        
        # pandas DataFrame に変換
        self.initial_data = pd.DataFrame(self.initial_data)
        self.bo_data = pd.DataFrame(self.bo_data)

        self.initial_data["X_initial"] = self.initial_data["X_initial"].apply(np.array)
        self.initial_data["y_initial"] = self.initial_data["y_initial"].apply(np.array)
        self.initial_data["y_initial"] = self.initial_data["y_initial"].apply(float)

        self.bo_data["X_new"] = self.bo_data["X_new"].apply(np.array)
        self.bo_data["y_new"] = self.bo_data["y_new"].apply(np.array)
        self.bo_data["y_new"] = self.bo_data["y_new"].apply(float)
        

    def _extract_float_list(self, array_str):
        # Updated regex pattern to capture numbers in scientific notation as well
        number_pattern = r"[-+]?\d*\.\d+(?:[eE][-+]?\d+)?|\d+"
        numbers = re.findall(number_pattern, array_str)
        return [float(num) for num in numbers]
    
    def create_combined_dataframe(self):
        beta_init = self.bo_data['Beta'].iloc[0]

        df_init = self.initial_data.copy()
        df_init['X_new'] = df_init['X_initial']
        df_init['y_new'] = df_init['y_initial']
        df_init.drop(columns=['X_initial', 'y_initial'], inplace=True)
        df_init['y_best'] = df_init['y_new'].min()
        df_init['Beta'] = beta_init
        df_init['Beta:n'] = beta_init / len(df_init)
        df_init['Iteration'] = 0

        df_main = self.bo_data.copy()
        df_main['y_best'] = df_main['y_new'].cummin()
        df_main['y_best'] = np.minimum(df_main['y_best'], df_init['y_best'].iloc[0])
        df_main['Beta:n'] = df_main['Beta'] / (len(df_init) + df_main.index)

        df = pd.concat([df_init, df_main])
        return df 
    
# Create a history plot for bo_data using Plotly
def history_plot(bo_data):
    fig = go.Figure()

    # Plot y_new as blue dots
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'],
        y=bo_data['y_new'],
        mode='markers',
        name='y_new',
        marker=dict(color='blue')
    ))

    # Plot y_best as a blue line
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'],
        y=bo_data['y_best'],
        mode='lines',
        name='y_best',
        line=dict(color='blue')
    ))

    # Highlight points where Beta:n >= 1 with orange dots
    high_beta_mask = bo_data['Beta:n'] >= 1
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'][high_beta_mask],
        y=bo_data['y_new'][high_beta_mask],
        mode='markers',
        name='y_new (Beta:n >= 1)',
        marker=dict(color='orange')
    ))

    # Customize the layout
    fig.update_layout(
        title="Bayesian Optimization History",
        xaxis_title="Iteration",
        yaxis_title="Function Value",
        legend_title="Metrics"
    )

    fig.show()

def process_dataframes(dataframes):
    # Find the minimum number of rows across all DataFrames
    min_rows = min(df.shape[0] for df in dataframes)
    max_rows = max(df.shape[0] for df in dataframes)

    # Truncate each DataFrame to the minimum number of rows
    truncated_dfs = [df[['y_best', 'Iteration', 'Beta']].head(min_rows) for df in dataframes]

    # Concatenate the truncated DataFrames along the row axis
    concatenated_df = pd.concat(truncated_dfs)

    # Group by the iteration and calculate the mean and std for y_best
    result_df = concatenated_df.groupby('Iteration').agg(
        y_best_mean=('y_best', 'mean'),
        y_best_std=('y_best', 'std'),
        beta_mean=('Beta', 'mean'),
        beta_std=('Beta', 'std')
    ).reset_index()

    # Print the minimum and maximum row counts
    print(f"Minimum number of rows: {min_rows}")
    print(f"Maximum number of rows: {max_rows}")
    
    return result_df

def add_history_plot_with_error_bars(fig, model_name, bo_data):
    # Plot y_best without error bars (for mean line and markers)
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'],
        y=bo_data['y_best_mean'],
        mode='lines+markers',
        name=f"{model_name} (Mean)",
        line=dict(color=color_dict[model_name])
    ))

    # Plot the same y_best with error bars and reduced opacity for the second trace
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'],
        y=bo_data['y_best_mean'],
        mode='lines+markers',
        name=f"{model_name} (Error Bars)",
        line=dict(color=color_dict[model_name]),
        opacity=0.3,  # Reduce opacity for the second trace (error bars)
        error_y=dict(
            type='data',  # Error type is 'data', which means we provide error bars as data
            array=bo_data['y_best_std'],  # Use standard deviation as error bars
            visible=True  # Make sure error bars are visible
        ),
        showlegend=False  # Hide this trace from the legend
    ))

def add_history_plot_with_shaded_error(fig, model_name, bo_data):
    # Plot y_best without error bars (for mean line and markers)
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'],
        y=bo_data['y_best_mean'],
        mode='lines+markers',
        name=f"{model_name} (Mean)",
        line=dict(color=color_dict[model_name])
    ))

    # Add the shaded area for the error bars (fill between y_best_mean ± y_best_std)
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'],
        y=bo_data['y_best_mean'] + bo_data['y_best_std'],  # Upper bound
        mode='lines',
        line=dict(width=0),  # No line for the upper bound
        showlegend=False,
        hoverinfo="skip",
        fill=None  # We do not fill anything yet
    ))

    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'],
        y=bo_data['y_best_mean'] - bo_data['y_best_std'],  # Lower bound
        mode='lines',
        line=dict(width=0),  # No line for the lower bound
        fill='tonexty',  # Fill the area between this trace and the previous one
        fillcolor=f"rgba({px.colors.hex_to_rgb(color_dict[model_name])[0]}, "
                  f"{px.colors.hex_to_rgb(color_dict[model_name])[1]}, "
                  f"{px.colors.hex_to_rgb(color_dict[model_name])[2]}, 0.2)",  # Adjust transparency with RGBA
        showlegend=False,
        hoverinfo="skip"
    ))

# Function to create and store result_df for each model
def create_result_df_dict(target, models, keywards):
    result_df_dict = {}
    
    # Loop through each model to process files and create result_df
    for model_name in models:
        # Update keywards with the current model name
        updated_keywards = [model_name] + keywards  # Replace the first item with model_name
        
        # Search for log files
        filename_list = search_log_files(target, updated_keywards)
        filepath_list = [os.path.join(target, filename) for filename in filename_list]

        print(filepath_list)
        
        # Initialize an empty list to hold dataframes
        dataframes = []
        
        # Parse each log file and create dataframes
        for filepath in filepath_list:
            parser = LogParser(filepath)
            parser.parse_log_file()
            df = parser.create_combined_dataframe()
            dataframes.append(df)
        
        # Process the dataframes and store the result in result_df_dict
        result_df = process_dataframes(dataframes)
        result_df_dict[model_name] = result_df  # Store the result in the dictionary with the model name as key
    
    return result_df_dict

# 1. SinusoidalSynthetic

## 1.1 noise 0

In [5]:
objective_list = ["SinusoidalSynthetic", "BraninHoo", "Hartmann6"]
acq_fn_list = ["EI", "POI", "UCB"]
model_list = ["GP", "TP", "AGT"]
noise_levels = ["noise0", "noise1", "noise2"]

output_dir = os.path.join(LOG_DIR, 'E4_')
target = os.path.join(LOG_DIR, 'E4_retrieved')
keywards = []

# search_log_files 関数の定義が前提
filename_list = search_log_files(target, keywards)
filepath_list = [os.path.join(target, filename) for filename in filename_list]

filepath_list

['/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_BraninHoo_AGT_EI_seed0_noise0_2542520.log',
 '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_BraninHoo_AGT_EI_seed0_noise1_2542521.log',
 '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_BraninHoo_AGT_EI_seed0_noise2_2542415.log',
 '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_BraninHoo_AGT_EI_seed0_noise2_2542522.log',
 '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_BraninHoo_AGT_EI_seed1_noise0_2542523.log',
 '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_BraninHoo_AGT_EI_seed1_noise1_2542524.log',
 '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_BraninHoo_AGT_EI_seed1_noise2_2542417.log',
 '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_BraninHoo_AGT_EI_seed1_noise2_2542525.log',
 '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_BraninHoo_AGT_EI_seed2_noise0_2542526.log',
 '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/proposed_bo_Bran

In [22]:
target = os.path.join(LOG_DIR, 'E4_retrieved')
models = ['GP', 'TP', 'AGT']
objective = 'SinusoidalSynthetic'
acquisition = 'EI'
# noise_levels = ["noise0", "noise1", "noise2"]
noise_level = "noise1"
keywards = [objective, acquisition, noise_level]

result_df_dict = create_result_df_dict(target, models, keywards)

color_dict = {
    'GP': px.colors.qualitative.Plotly[0],
    'TP': px.colors.qualitative.Plotly[1],
    'AGT': px.colors.qualitative.Plotly[2],
}

fig = go.Figure()

for model_name, result_df in result_df_dict.items():
    # add_history_plot_with_error_bars(fig, model_name, result_df)
    add_history_plot_with_shaded_error(fig, model_name, result_df)

# Customize the layout with transparent background and grid
fig.update_layout(
    title=f"{objective} with {acquisition} Acquisition under {noise_level} Noise",
    xaxis_title="Iteration",
    yaxis_title="Best Function Value",
    plot_bgcolor='rgba(0, 0, 0, 0)',  # Set background to transparent
    xaxis=dict(
        showgrid=True,  # Display grid
        gridcolor='lightgray'  # Set grid line color
    ),
    yaxis=dict(
        showgrid=True,  # Display grid
        gridcolor='lightgray'  # Set grid line color
    )
)

fig.show()

Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Minimum number of rows: 54
Maximum number of rows: 54
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Minimum number of rows: 54
Maximum number of rows: 54
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid synt

In [7]:
import os

# Define parameters
objectives = ['SinusoidalSynthetic', 'BraninHoo', 'Hartmann6']  # Example objectives
acquisitions = ['EI', 'POI', 'UCB']  # Example acquisitions
noise_levels = ["noise0", "noise1", "noise2"]

models = ['GP', 'TP', 'AGT']

# Directory to save images
target = os.path.join(LOG_DIR, 'E4_retrieved')
output_dir = os.path.join(target, 'saved_plots')
os.makedirs(output_dir, exist_ok=True)

# Iterate over all combinations of objective, acquisition, and noise level
for objective in objectives:
    for acquisition in acquisitions:
        for noise_level in noise_levels:
            print(f"Processing: {objective}, {acquisition}, {noise_level}")
            # Update keywards and load data
            keywards = [objective, acquisition, noise_level]
            result_df_dict = create_result_df_dict(target, models, keywards)

            fig = go.Figure()

            # Plot for each model
            for model_name, result_df in result_df_dict.items():
                add_history_plot_with_shaded_error(fig, model_name, result_df)

            # Customize the layout with transparent background and grid
            fig.update_layout(
                title=f"{objective} with {acquisition} Acquisition under {noise_level} Noise",
                xaxis_title="Iteration",
                yaxis_title="Best Function Value",
                plot_bgcolor='rgba(0, 0, 0, 0)',  # Set background to transparent
                xaxis=dict(
                    showgrid=True,  # Display grid
                    gridcolor='lightgray'  # Set grid line color
                ),
                yaxis=dict(
                    showgrid=True,  # Display grid
                    gridcolor='lightgray'  # Set grid line color
                )
            )

            # Generate filename
            filename = f"{objective}_{acquisition}_{noise_level}.png"
            filepath = os.path.join(output_dir, filename)

            # Save figure as an image
            # fig.write_image(filepath)
            fig.write_image(filepath, width=1200, height=800)

            print(f"Saved plot: {filepath}")


Processing: SinusoidalSynthetic, EI, noise0
['/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/vanilla_bo_SinusoidalSynthetic_GP_EI_seed0_noise0_2542642.log', '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/vanilla_bo_SinusoidalSynthetic_GP_EI_seed1_noise0_2542645.log', '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/vanilla_bo_SinusoidalSynthetic_GP_EI_seed2_noise0_2542648.log', '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/vanilla_bo_SinusoidalSynthetic_GP_EI_seed3_noise0_2542651.log', '/Users/keisukeonoue/ws/TPBO/logs/E4_retrieved/vanilla_bo_SinusoidalSynthetic_GP_EI_seed4_noise0_2542654.log']
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Minimum number of rows: 54
Maximum number of rows: 54
['/Users/keisukeonoue/ws/TPBO/logs/E4_ret