# E1

In [1]:
import os

from _import_from_src import search_log_files
from _import_from_src import LogParser
from _import_from_src import LOG_DIR

target = os.path.join(LOG_DIR, 'E1')

In [2]:
search_log_files(target, ["seed1"])

['2024-09-21_04-10-09_vanilla_bo_Hartmann6_GP_EI_seed1.log',
 '2024-09-21_04-10-09_vanilla_bo_Hartmann6_GP_POI_seed1.log',
 '2024-09-21_04-10-09_vanilla_bo_Hartmann6_GP_UCB_seed1.log',
 '2024-09-21_04-10-09_vanilla_bo_Hartmann6_TP_EI_seed1.log',
 '2024-09-21_04-10-09_vanilla_bo_Hartmann6_TP_POI_seed1.log',
 '2024-09-21_04-10-09_vanilla_bo_Hartmann6_TP_UCB_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_BraninHoo_GP_EI_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_BraninHoo_GP_POI_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_BraninHoo_GP_UCB_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_BraninHoo_TP_EI_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_BraninHoo_TP_POI_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_BraninHoo_TP_UCB_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_SinusoidalSynthetic_GP_EI_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_SinusoidalSynthetic_GP_POI_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_SinusoidalSynthetic_GP_UCB_seed1.log',
 '2024-09-21_04-10-30_vanilla_bo_SinusoidalSy

In [3]:
search_log_files(target, ["seed0"])

['2024-09-20_22-18-11_vanilla_bo_Hartmann6_GP_EI_seed0.log',
 '2024-09-20_22-18-11_vanilla_bo_Hartmann6_GP_POI_seed0.log',
 '2024-09-20_22-18-11_vanilla_bo_Hartmann6_GP_UCB_seed0.log',
 '2024-09-20_22-18-11_vanilla_bo_Hartmann6_TP_EI_seed0.log',
 '2024-09-20_22-18-11_vanilla_bo_Hartmann6_TP_POI_seed0.log',
 '2024-09-20_22-18-11_vanilla_bo_Hartmann6_TP_UCB_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_BraninHoo_GP_EI_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_BraninHoo_GP_POI_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_BraninHoo_GP_UCB_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_BraninHoo_TP_EI_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_BraninHoo_TP_POI_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_BraninHoo_TP_UCB_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_SinusoidalSynthetic_GP_EI_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_SinusoidalSynthetic_GP_POI_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_SinusoidalSynthetic_GP_UCB_seed0.log',
 '2024-09-20_22-18-15_vanilla_bo_SinusoidalSy

In [20]:
import ast
import re
import os

import numpy as np
import pandas as pd



class LogParser:
    def __init__(self, file_path):
        self.file_path = file_path
        self.settings = {}
        self.initial_data = {"X_initial": [], "y_initial": []}
        self.bo_data = {"X_new": [], "y_new": [], "Beta": [], "Iteration": []}
        self.objective = None

    def _combine_log_entries(self):
        with open(self.file_path, "r") as file:  # 修正: self.log_file → self.file_path
            lines = file.readlines()

        timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3} - "

        combined_lines = []
        current_entry = ""

        for line in lines:
            if re.match(timestamp_pattern, line):
                if current_entry:
                    combined_lines.append(current_entry.strip())
                current_entry = line.strip()
            else:
                current_entry += " " + line.strip()

        if current_entry:
            combined_lines.append(current_entry.strip())

        return combined_lines

    def _parse_settings(self, line):
        settings_str = line.split("settings:")[1].strip()
        try:
            # Parse the settings safely
            self.settings = self._safe_parse_settings(settings_str)
            
            # More flexible regex to capture the class name of the objective_function
            obj_func_match = re.search(r"objective_function':\s*<.*?\.([A-Za-z0-9_]+)\s+object\s+at", settings_str)
            
            if obj_func_match:
                objective_function_name = obj_func_match.group(1)
                print(f"Objective Function: {objective_function_name}")
            else:
                print("Objective Function not found in settings.")
                
        except SyntaxError as e:
            print(f"Failed to parse settings: {e}")
            self.settings = settings_str

    def _safe_parse_settings(self, settings_str):
        # array と <function>, <class> の部分を文字列に置換
        settings_str = re.sub(r"<function\s+\w+\s+at\s+0x[0-9a-fA-F]+>", "'<function>'", settings_str)
        settings_str = re.sub(r"<class\s+'\w+\.\w+'>", "'<class>'", settings_str)
        settings_str = re.sub(r"<\w+\.\w+\s+object\s+at\s+0x[0-9a-fA-F]+>", "'<object>'", settings_str)
        
        # 辞書を文字列から生成（evalは使わず安全なast.literal_evalを使う）
        settings_dict = ast.literal_eval(settings_str)
        
        return settings_dict

    def parse_log_file(self):
        combined_lines = self._combine_log_entries()
        current_data = {}
        
        for line in combined_lines:
            # Parse settings
            if "Start BO with settings:" in line:
                settings_str = re.search(r"Start BO with settings: (.*)", line).group(1)

                try:
                    self._parse_settings(line)
                except Exception as e:
                    print(f"Failed to parse settings: {e}")
                    self.settings = settings_str
            
            # Parse X_initial and y_initial
            elif "X initial:" in line:
                x_initial_str = re.search(r"X initial: (\[.*?\])", line).group(1)
                self.initial_data["X_initial"].append(self._extract_float_list(x_initial_str))
            elif "y initial:" in line:
                y_initial_str = re.search(r"y initial: (\[.*?\])", line).group(1)
                self.initial_data["y_initial"].append(self._extract_float_list(y_initial_str))

            # Parse Beta, Iteration, X_new, and y_new
            elif "Beta:" in line:
                current_data["Beta"] = float(re.search(r"Beta: ([-+]?\d*\.\d+|\d+)", line).group(1))
            elif "Iteration:" in line:
                current_data["Iteration"] = int(re.search(r"Iteration: (\d+) /", line).group(1))
            elif "X new:" in line:
                current_data["X_new"] = self._extract_float_list(re.search(r"X new: (\[\[.*?\]\])", line).group(1))
            elif "y new:" in line:
                current_data["y_new"] = self._extract_float_list(re.search(r"y new: (\[\[.*?\]\])", line).group(1))

                # Check if all required fields are in `current_data`
                if all(key in current_data for key in ["Beta", "Iteration", "X_new", "y_new"]):
                    # Append the current data to `self.bo_data`
                    self.bo_data["Beta"].append(current_data["Beta"])
                    self.bo_data["Iteration"].append(current_data["Iteration"])
                    self.bo_data["X_new"].append(current_data["X_new"])
                    self.bo_data["y_new"].append(current_data["y_new"])

                    # Clear current_data for the next entry
                    current_data.clear()
        
        # pandas DataFrame に変換
        self.initial_data = pd.DataFrame(self.initial_data)
        self.bo_data = pd.DataFrame(self.bo_data)

        self.initial_data["X_initial"] = self.initial_data["X_initial"].apply(np.array)
        self.initial_data["y_initial"] = self.initial_data["y_initial"].apply(np.array)
        self.initial_data["y_initial"] = self.initial_data["y_initial"].apply(float)

        self.bo_data["X_new"] = self.bo_data["X_new"].apply(np.array)
        self.bo_data["y_new"] = self.bo_data["y_new"].apply(np.array)
        self.bo_data["y_new"] = self.bo_data["y_new"].apply(float)
        

    def _extract_float_list(self, array_str):
        # Updated regex pattern to capture numbers in scientific notation as well
        number_pattern = r"[-+]?\d*\.\d+(?:[eE][-+]?\d+)?|\d+"
        numbers = re.findall(number_pattern, array_str)
        return [float(num) for num in numbers]
    
    def create_combined_dataframe(self):
        beta_init = self.bo_data['Beta'].iloc[0]

        df_init = self.initial_data.copy()
        df_init['X_new'] = df_init['X_initial']
        df_init['y_new'] = df_init['y_initial']
        df_init.drop(columns=['X_initial', 'y_initial'], inplace=True)
        df_init['y_best'] = df_init['y_new'].min()
        df_init['Beta'] = beta_init
        df_init['Beta:n'] = beta_init / len(df_init)
        df_init['Iteration'] = 0

        df_main = self.bo_data.copy()
        df_main['y_best'] = df_main['y_new'].cummin()
        df_main['y_best'] = np.minimum(df_main['y_best'], df_init['y_best'].iloc[0])
        df_main['Beta:n'] = df_main['Beta'] / (len(df_init) + df_main.index)

        df = pd.concat([df_init, df_main])
        return df 


# ファイルをパースする
# file_name = '2024-09-21_04-10-09_vanilla_bo_Hartmann6_GP_EI_seed1.log'
# file_name = '2024-09-20_22-18-11_vanilla_bo_Hartmann6_GP_EI_seed0.log'
file_name = '2024-09-20_22-18-11_vanilla_bo_Hartmann6_TP_EI_seed0.log'
file_path = os.path.join(target, file_name)

parser = LogParser(file_path)
parser.parse_log_file()

# # 結果を表示
# print("Settings:", parser.settings)
# print("Initial Data:")
# print(parser.initial_data)
# print("BO Data:")
# print(parser.bo_data)

df_combined = parser.create_combined_dataframe()
df_combined

Failed to parse settings: invalid syntax (<unknown>, line 1)


Unnamed: 0,X_new,y_new,y_best,Beta,Beta:n,Iteration
0,"[0.62279334, 0.47389865, 0.86142857, 0.4732815...",-0.188452,-1.261967,2.055096,0.513774,0
1,"[0.26682763, 0.88574659, 0.0114465, 0.75313756...",-0.040065,-1.261967,2.055096,0.513774,0
2,"[0.2018369, 0.22683737, 0.51316811, 0.06612204...",-1.261967,-1.261967,2.055096,0.513774,0
3,"[0.90835808, 0.63090744, 0.36410912, 0.6602818...",-0.017877,-1.261967,2.055096,0.513774,0
0,"[0.27721548, 0.05223083, 0.60450494, 0.1630376...",-1.484909,-1.484909,2.055096,0.513774,1
...,...,...,...,...,...,...
146,"[0.92939043, 0.78809917, 0.559425, 0.81270087,...",-0.003353,-3.268768,43.840466,0.292270,147
147,"[0.92939043, 0.78809917, 0.559425, 0.81270087,...",-0.003353,-3.268768,43.199276,0.286088,148
148,"[0.92939043, 0.78809917, 0.559425, 0.81270087,...",-0.003353,-3.268768,42.695103,0.280889,149
149,"[0.92939043, 0.78809917, 0.559425, 0.81270087,...",-0.003353,-3.268768,41.831009,0.273405,150


In [21]:
import plotly.graph_objects as go

# Create a history plot for bo_data using Plotly
def history_plot(bo_data):
    fig = go.Figure()

    # Plot y_new as blue dots
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'],
        y=bo_data['y_new'],
        mode='markers',
        name='y_new',
        marker=dict(color='blue')
    ))

    # Plot y_best as a blue line
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'],
        y=bo_data['y_best'],
        mode='lines',
        name='y_best',
        line=dict(color='blue')
    ))

    # Highlight points where Beta:n >= 1 with orange dots
    high_beta_mask = bo_data['Beta:n'] >= 1
    fig.add_trace(go.Scatter(
        x=bo_data['Iteration'][high_beta_mask],
        y=bo_data['y_new'][high_beta_mask],
        mode='markers',
        name='y_new (Beta:n >= 1)',
        marker=dict(color='orange')
    ))

    # Customize the layout
    fig.update_layout(
        title="Bayesian Optimization History",
        xaxis_title="Iteration",
        yaxis_title="Function Value",
        legend_title="Metrics"
    )

    fig.show()

# Apply the function to plot the bo_data
history_plot(df_combined)


## 比較

In [12]:
target = os.path.join(LOG_DIR, 'E1')

file_name_list = search_log_files(target, ["seed1", "Sinusoidal"])
print(file_name_list)

file_path_list = [os.path.join(target, file_name) for file_name in file_name_list]

df_dict = {
    "GP": {
        "EI": None,
        "POI": None,
        "UCB": None
    },
    "TP": {
        "EI": None,
        "POI": None,
        "UCB": None
    }
}

for file_path in file_path_list:
    parser = LogParser(file_path)
    parser.parse_log_file()
    df_combined = parser.create_combined_dataframe()
    
    # # Extract the acquisition function and the surrogate model
    # model_name = parser.settings["surrogate_model"]
    # acq_fn = parser.settings["acquisition_function"]
    
    model_name = "GP" if "GP" in file_path else "TP"
    acq_fn = "EI" if "EI" in file_path else "POI" if "POI" in file_path else "UCB"

    # Store the combined dataframe in the dictionary
    df_dict[model_name][acq_fn] = df_combined


print(df_dict)

['2024-09-21_04-10-30_vanilla_bo_SinusoidalSynthetic_GP_EI_seed1.log', '2024-09-21_04-10-30_vanilla_bo_SinusoidalSynthetic_GP_POI_seed1.log', '2024-09-21_04-10-30_vanilla_bo_SinusoidalSynthetic_GP_UCB_seed1.log', '2024-09-21_04-10-30_vanilla_bo_SinusoidalSynthetic_TP_EI_seed1.log', '2024-09-21_04-10-30_vanilla_bo_SinusoidalSynthetic_TP_POI_seed1.log', '2024-09-21_04-10-30_vanilla_bo_SinusoidalSynthetic_TP_UCB_seed1.log']
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
Failed to parse settings: invalid syntax (<unknown>, line 1)
{'GP': {'EI':            X_new      y_new     y_best       Beta    Beta:n  Iteration
0   [8.95173865]   8.787816 -25.430301   1.363539  0.340885          0
1   [6.37751095] -25.430301 -25.430301   1.363539  0.

In [25]:
import plotly.graph_objects as go

def add_history_plot(fig, df_dict, model_name, acq_fn):
    color_dict = {
        "GP": "blue",
        "TP": "red"
    }

    df = df_dict[model_name][acq_fn]

    # Plot y_new
    fig.add_trace(go.Scatter(
        x=df['Iteration'],
        y=df['y_new'],
        mode='markers',
        name=f"{model_name} - {acq_fn}",
        marker=dict(color=color_dict[model_name])
    ))

    # Plot y_best
    fig.add_trace(go.Scatter(
        x=df['Iteration'],
        y=df['y_best'],
        mode='lines',
        name=f"{model_name} - {acq_fn}",
        line=dict(color=color_dict[model_name])
    ))

    # # Highlight points where Beta:n >= 1 with orange dots
    # high_beta_mask = df['Beta:n'] >= 1
    # fig.add_trace(go.Scatter(
    #     x=df['Iteration'][high_beta_mask],
    #     y=df['y_new'][high_beta_mask],
    #     mode='markers',
    #     name=f"{model_name} - {acq_fn} (Beta:n >= 1)",
    #     marker=dict(color='orange')
    # ))

    return None

In [26]:
fig = go.Figure()

for model_name in df_dict.keys():
    acq_fn = "EI"
    add_history_plot(fig, df_dict, model_name, acq_fn)

fig.update_layout(
    title="Bayesian Optimization History",
    xaxis_title="Iteration",
    yaxis_title="Function Value",
    legend_title="Metrics"
)

fig.show()