In [1]:
import os

import numpy as np
import optuna
import pandas as pd

from _src import LOG_DIR, DB_DIR
from _src import search_log_files

In [10]:
search_log_files(DB_DIR, ["bo", "random"])

['2024-10-12_15-27-35_bo_benchmark_random_seed0.db',
 '2024-10-12_15-27-44_bo_benchmark_random_seed1.db',
 '2024-10-12_15-27-53_bo_benchmark_random_seed2.db',
 '2024-10-12_15-28-02_bo_benchmark_random_seed3.db',
 '2024-10-12_15-28-10_bo_benchmark_random_seed4.db']

In [3]:
search_log_files(DB_DIR, ["bo",])

['2024-10-12_15-27-35_bo_benchmark_random_seed0.db',
 '2024-10-12_15-27-44_bo_benchmark_random_seed1.db',
 '2024-10-12_15-27-53_bo_benchmark_random_seed2.db',
 '2024-10-12_15-28-02_bo_benchmark_random_seed3.db',
 '2024-10-12_15-28-10_bo_benchmark_random_seed4.db',
 '2024-10-12_15-29-04_bo_benchmark_tpe_seed0.db',
 '2024-10-12_15-29-15_bo_benchmark_tpe_seed1.db',
 '2024-10-12_15-29-26_bo_benchmark_tpe_seed2.db',
 '2024-10-12_15-29-38_bo_benchmark_tpe_seed3.db',
 '2024-10-12_15-29-50_bo_benchmark_tpe_seed4.db',
 '2024-10-12_15-30-00_bo_benchmark_bruteforce_seed0.db',
 '2024-10-12_15-32-23_bo_parafac_seed0.db',
 '2024-10-12_15-33-00_bo_parafac_seed1.db',
 '2024-10-12_15-33-37_bo_parafac_seed2.db',
 '2024-10-12_15-34-14_bo_parafac_seed3.db',
 '2024-10-12_15-34-52_bo_parafac_seed4.db']

In [6]:
path = os.path.join(DB_DIR, '2024-10-12_15-27-35_bo_benchmark_random_seed0.db')
storage_url = f"sqlite:///{path}"
study = optuna.load_study(study_name='bo_benchmark_random_seed0', storage=storage_url)
dataframe = study.trials_dataframe().copy()

if study.direction == optuna.study.StudyDirection.MAXIMIZE:
    dataframe['best_value'] = dataframe['value'].cummax()
else:
    dataframe['best_value'] = dataframe['value'].cummin()


<optuna.study.study.Study at 0x1100c39e0>

In [9]:
study.direction

<StudyDirection.MINIMIZE: 1>

In [22]:
import os
import optuna
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# List of file names for seeds 0-4
db_files = search_log_files(DB_DIR, ["random"])

db_folder = DB_DIR  # The folder where .db files are stored

# Initialize a list to store dataframes for each seed
dataframes = []

# Loop over each file, load the study, and calculate best values
for db_file in db_files:
    path = os.path.join(db_folder, db_file)
    storage_url = f"sqlite:///{path}"
    
    # Extract seed number from the filename (assuming the format stays consistent)
    seed = db_file.split('_')[-1].replace('.db', '')

    # Load the study
    study = optuna.load_study(study_name=f'bo_benchmark_random_{seed}', storage=storage_url)
    
    # Get the trials as a dataframe
    df = study.trials_dataframe().copy()
    
    # Compute the best values for each iteration
    if study.direction == optuna.study.StudyDirection.MAXIMIZE:
        df['best_value'] = df['value'].cummax()
    else:
        df['best_value'] = df['value'].cummin()
    
    # Store the dataframe for the current seed
    dataframes.append(df[['number', 'best_value']])

# Merge dataframes by iteration number
merged_df = pd.concat(dataframes, axis=1, keys=[f'seed_{i}' for i in range(len(db_files))])

# Compute mean and standard deviation of best values across seeds
best_values = merged_df.xs('best_value', axis=1, level=1)
mean_best = best_values.mean(axis=1)
std_best = best_values.std(axis=1)

# Compute upper and lower bounds for shaded area
upper_bound = mean_best + std_best
lower_bound = mean_best - std_best

# Plotting with Plotly
fig = go.Figure()

# Add the mean line
fig.add_trace(go.Scatter(
    x=best_values.index,
    y=mean_best,
    mode='lines',
    name='Mean Best Value',
    line=dict(color='red')
))

# Add the shaded error region
fig.add_trace(go.Scatter(
    x=best_values.index,
    y=upper_bound,
    mode='lines',
    line=dict(width=0),  # No line for upper bound
    name='Upper Bound',
    showlegend=False
))

fig.add_trace(go.Scatter(
    x=best_values.index,
    y=lower_bound,
    mode='lines',
    line=dict(width=0),  # No line for lower bound
    fill='tonexty',  # Fill the area between the upper and lower bound
    fillcolor='rgba(255, 0, 0, 0.2)',  # Red with transparency
    name='Shaded Error',
    showlegend=False
))

# Set plot labels and title
fig.update_layout(
    title="Best Value with Shaded Error Region across Seeds (0-4)",
    xaxis_title="Iteration",
    yaxis_title="Best Value",
    template="plotly_white"
)

# Show the plot
fig.show()

In [23]:
import os
import optuna
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# List of samplers to include
samplers = ["random", "tpe", "parafac"]

# Initialize a figure for the plot
fig = go.Figure()

# Colors for each sampler
sampler_colors = {
    "random": "red",
    "tpe": "blue",
    "parafac": "green"
}

# Loop over each sampler
for sampler in samplers:
    # List of file names for the current sampler
    db_files = search_log_files(DB_DIR, [sampler])

    print(db_files)

    # Initialize a list to store dataframes for each seed
    dataframes = []

    # Loop over each file, load the study, and calculate best values
    for db_file in db_files:
        path = os.path.join(DB_DIR, db_file)
        storage_url = f"sqlite:///{path}"
        
        # Extract seed number from the filename (assuming the format stays consistent)
        seed = db_file.split('_')[-1].replace('.db', '')

        # Load the study
        study = optuna.load_study(study_name=f'bo_benchmark_{sampler}_{seed}', storage=storage_url)
        
        # Get the trials as a dataframe
        df = study.trials_dataframe().copy()
        
        # Compute the best values for each iteration
        if study.direction == optuna.study.StudyDirection.MAXIMIZE:
            df['best_value'] = df['value'].cummax()
        else:
            df['best_value'] = df['value'].cummin()
        
        # Store the dataframe for the current seed
        dataframes.append(df[['number', 'best_value']])

    # Merge dataframes by iteration number
    merged_df = pd.concat(dataframes, axis=1, keys=[f'seed_{i}' for i in range(len(db_files))])

    # Compute mean and standard deviation of best values across seeds
    best_values = merged_df.xs('best_value', axis=1, level=1)
    mean_best = best_values.mean(axis=1)
    std_best = best_values.std(axis=1)

    # Compute upper and lower bounds for shaded area
    upper_bound = mean_best + std_best
    lower_bound = mean_best - std_best

    # Add the mean line for the current sampler
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=mean_best,
        mode='lines',
        name=f'Mean Best Value ({sampler})',
        line=dict(color=sampler_colors[sampler])
    ))

    # Add the shaded error region for the current sampler
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=upper_bound,
        mode='lines',
        line=dict(width=0),  # No line for upper bound
        name=f'Upper Bound ({sampler})',
        showlegend=False
    ))

    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=lower_bound,
        mode='lines',
        line=dict(width=0),  # No line for lower bound
        fill='tonexty',  # Fill the area between the upper and lower bound
        fillcolor=f'rgba({sampler_colors[sampler].replace("red", "255, 0, 0").replace("blue", "0, 0, 255").replace("green", "0, 255, 0")}, 0.2)',  # Transparent fill for each color
        name=f'Shaded Error ({sampler})',
        showlegend=False
    ))

# Set plot labels and title
fig.update_layout(
    title="Best Value with Shaded Error Region for Different Samplers",
    xaxis_title="Iteration",
    yaxis_title="Best Value",
    template="plotly_white"
)

# Show the plot
fig.show()

['2024-10-12_15-27-35_bo_benchmark_random_seed0.db', '2024-10-12_15-27-44_bo_benchmark_random_seed1.db', '2024-10-12_15-27-53_bo_benchmark_random_seed2.db', '2024-10-12_15-28-02_bo_benchmark_random_seed3.db', '2024-10-12_15-28-10_bo_benchmark_random_seed4.db']
['2024-10-12_15-29-04_bo_benchmark_tpe_seed0.db', '2024-10-12_15-29-15_bo_benchmark_tpe_seed1.db', '2024-10-12_15-29-26_bo_benchmark_tpe_seed2.db', '2024-10-12_15-29-38_bo_benchmark_tpe_seed3.db', '2024-10-12_15-29-50_bo_benchmark_tpe_seed4.db']
['2024-10-12_15-32-23_bo_parafac_seed0.db', '2024-10-12_15-33-00_bo_parafac_seed1.db', '2024-10-12_15-33-37_bo_parafac_seed2.db', '2024-10-12_15-34-14_bo_parafac_seed3.db', '2024-10-12_15-34-52_bo_parafac_seed4.db']


KeyError: 'Record does not exist.'

In [26]:
import os
import optuna
import numpy as np
import pandas as pd
import plotly.graph_objects as go

# Dictionary of samplers with their respective file lists
sampler_files = {
    "random": [
        '2024-10-12_15-27-35_bo_benchmark_random_seed0.db',
        '2024-10-12_15-27-44_bo_benchmark_random_seed1.db',
        '2024-10-12_15-27-53_bo_benchmark_random_seed2.db',
        '2024-10-12_15-28-02_bo_benchmark_random_seed3.db',
        '2024-10-12_15-28-10_bo_benchmark_random_seed4.db'
    ],
    "tpe": [
        '2024-10-12_15-29-04_bo_benchmark_tpe_seed0.db',
        '2024-10-12_15-29-15_bo_benchmark_tpe_seed1.db',
        '2024-10-12_15-29-26_bo_benchmark_tpe_seed2.db',
        '2024-10-12_15-29-38_bo_benchmark_tpe_seed3.db',
        '2024-10-12_15-29-50_bo_benchmark_tpe_seed4.db'
    ],
    "parafac": [
        '2024-10-12_15-32-23_bo_parafac_seed0.db',
        '2024-10-12_15-33-00_bo_parafac_seed1.db',
        '2024-10-12_15-33-37_bo_parafac_seed2.db',
        '2024-10-12_15-34-14_bo_parafac_seed3.db',
        '2024-10-12_15-34-52_bo_parafac_seed4.db'
    ]
}

db_folder = DB_DIR  # The folder where .db files are stored

# Initialize a figure for the plot
fig = go.Figure()

# Colors for each sampler, with "parafac" set to orange
sampler_colors = {
    "random": "red",
    "tpe": "blue",
    "parafac": "green"
}

# Loop over each sampler
for sampler, db_files in sampler_files.items():
    
    # Initialize a list to store dataframes for each seed
    dataframes = []

    # Loop over each file, load the study, and calculate best values
    for db_file in db_files:
        path = os.path.join(db_folder, db_file)
        storage_url = f"sqlite:///{path}"
        
        # Load the study (no need for seed here since filenames are fixed)
        study_name = db_file.split('_bo_')[1].replace('.db', '')
        print(study_name)
        study = optuna.load_study(study_name="bo_"+study_name, storage=storage_url)
        
        # Get the trials as a dataframe
        df = study.trials_dataframe().copy()
        
        # Compute the best values for each iteration
        if study.direction == optuna.study.StudyDirection.MAXIMIZE:
            df['best_value'] = df['value'].cummax()
        else:
            df['best_value'] = df['value'].cummin()
        
        # Store the dataframe for the current seed
        dataframes.append(df[['number', 'best_value']])

    # Merge dataframes by iteration number
    merged_df = pd.concat(dataframes, axis=1, keys=[f'seed_{i}' for i in range(len(db_files))])

    # Compute mean and standard deviation of best values across seeds
    best_values = merged_df.xs('best_value', axis=1, level=1)
    mean_best = best_values.mean(axis=1)
    std_best = best_values.std(axis=1)

    # Compute upper and lower bounds for shaded area
    upper_bound = mean_best + std_best
    lower_bound = mean_best - std_best

    # Add the mean line for the current sampler
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=mean_best,
        mode='lines',
        name=f'Mean Best Value ({sampler})',
        line=dict(color=sampler_colors[sampler])
    ))

    # Add the shaded error region for the current sampler without a legend
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=upper_bound,
        mode='lines',
        line=dict(width=0),  # No line for upper bound
        showlegend=False
    ))

    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=lower_bound,
        mode='lines',
        line=dict(width=0),  # No line for lower bound
        fill='tonexty',  # Fill the area between the upper and lower bound
        fillcolor=f'rgba({sampler_colors[sampler].replace("red", "255, 0, 0").replace("blue", "0, 0, 255").replace("orange", "255, 165, 0")}, 0.2)',  # Transparent fill for each color
        showlegend=False
    ))

# Set plot labels and title
fig.update_layout(
    title="Best Value with Shaded Error Region for Different Samplers",
    xaxis_title="Iteration",
    yaxis_title="Best Value",
    template="plotly_white"
)

# Show the plot
fig.show()

benchmark_random_seed0
benchmark_random_seed1
benchmark_random_seed2
benchmark_random_seed3
benchmark_random_seed4
benchmark_tpe_seed0
benchmark_tpe_seed1
benchmark_tpe_seed2
benchmark_tpe_seed3
benchmark_tpe_seed4
parafac_seed0
parafac_seed1
parafac_seed2
parafac_seed3
parafac_seed4


In [28]:
import os
import optuna
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Get Plotly's qualitative color scale
color_scale = px.colors.qualitative.Plotly

# Dictionary of samplers with their respective file lists
sampler_files = {
    "random": [
        '2024-10-12_15-27-35_bo_benchmark_random_seed0.db',
        '2024-10-12_15-27-44_bo_benchmark_random_seed1.db',
        '2024-10-12_15-27-53_bo_benchmark_random_seed2.db',
        '2024-10-12_15-28-02_bo_benchmark_random_seed3.db',
        '2024-10-12_15-28-10_bo_benchmark_random_seed4.db'
    ],
    "tpe": [
        '2024-10-12_15-29-04_bo_benchmark_tpe_seed0.db',
        '2024-10-12_15-29-15_bo_benchmark_tpe_seed1.db',
        '2024-10-12_15-29-26_bo_benchmark_tpe_seed2.db',
        '2024-10-12_15-29-38_bo_benchmark_tpe_seed3.db',
        '2024-10-12_15-29-50_bo_benchmark_tpe_seed4.db'
    ],
    "parafac": [
        '2024-10-12_15-32-23_bo_parafac_seed0.db',
        '2024-10-12_15-33-00_bo_parafac_seed1.db',
        '2024-10-12_15-33-37_bo_parafac_seed2.db',
        '2024-10-12_15-34-14_bo_parafac_seed3.db',
        '2024-10-12_15-34-52_bo_parafac_seed4.db'
    ]
}

db_folder = DB_DIR  # The folder where .db files are stored

# Initialize a figure for the plot
fig = go.Figure()

# Loop over each sampler and assign colors from the Plotly qualitative scale
for idx, (sampler, db_files) in enumerate(sampler_files.items()):
    
    # Initialize a list to store dataframes for each seed
    dataframes = []

    # Loop over each file, load the study, and calculate best values
    for db_file in db_files:
        path = os.path.join(db_folder, db_file)
        storage_url = f"sqlite:///{path}"
        
        # Load the study (no need for seed here since filenames are fixed)
        study_name = db_file.split('_bo_')[1].replace('.db', '')
        study = optuna.load_study(study_name="bo_"+study_name, storage=storage_url)
        
        # Get the trials as a dataframe
        df = study.trials_dataframe().copy()
        
        # Compute the best values for each iteration
        if study.direction == optuna.study.StudyDirection.MAXIMIZE:
            df['best_value'] = df['value'].cummax()
        else:
            df['best_value'] = df['value'].cummin()
        
        # Store the dataframe for the current seed
        dataframes.append(df[['number', 'best_value']])

    # Merge dataframes by iteration number
    merged_df = pd.concat(dataframes, axis=1, keys=[f'seed_{i}' for i in range(len(db_files))])

    # Compute mean and standard deviation of best values across seeds
    best_values = merged_df.xs('best_value', axis=1, level=1)
    mean_best = best_values.mean(axis=1)
    std_best = best_values.std(axis=1)

    # Compute upper and lower bounds for shaded area
    upper_bound = mean_best + std_best
    lower_bound = mean_best - std_best

    # Add the mean line for the current sampler using the color from the color scale
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=mean_best,
        mode='lines',
        name=f'Mean Best Value ({sampler})',
        line=dict(color=color_scale[idx])
    ))

    # Add the shaded error region for the current sampler without a legend
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=upper_bound,
        mode='lines',
        line=dict(width=0),  # No line for upper bound
        showlegend=False
    ))

    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=lower_bound,
        mode='lines',
        line=dict(width=0),  # No line for lower bound
        fill='tonexty',  # Fill the area between the upper and lower bound
        fillcolor=f'rgba({color_scale[idx].replace("rgb(", "").replace(")", "")}, 0.2)',  # Transparent fill for each color
        showlegend=False
    ))

# Set plot labels and title
fig.update_layout(
    title="Best Value with Shaded Error Region for Different Samplers",
    xaxis_title="Iteration",
    yaxis_title="Best Value",
    template="plotly_white"
)

# Show the plot
fig.show()

ValueError: 
    Invalid value of type 'builtins.str' received for the 'fillcolor' property of scatter
        Received value: 'rgba(#636EFA, 0.2)'

    The 'fillcolor' property is a color and may be specified as:
      - A hex string (e.g. '#ff0000')
      - An rgb/rgba string (e.g. 'rgb(255,0,0)')
      - An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
      - An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
      - A named CSS color:
            aliceblue, antiquewhite, aqua, aquamarine, azure,
            beige, bisque, black, blanchedalmond, blue,
            blueviolet, brown, burlywood, cadetblue,
            chartreuse, chocolate, coral, cornflowerblue,
            cornsilk, crimson, cyan, darkblue, darkcyan,
            darkgoldenrod, darkgray, darkgrey, darkgreen,
            darkkhaki, darkmagenta, darkolivegreen, darkorange,
            darkorchid, darkred, darksalmon, darkseagreen,
            darkslateblue, darkslategray, darkslategrey,
            darkturquoise, darkviolet, deeppink, deepskyblue,
            dimgray, dimgrey, dodgerblue, firebrick,
            floralwhite, forestgreen, fuchsia, gainsboro,
            ghostwhite, gold, goldenrod, gray, grey, green,
            greenyellow, honeydew, hotpink, indianred, indigo,
            ivory, khaki, lavender, lavenderblush, lawngreen,
            lemonchiffon, lightblue, lightcoral, lightcyan,
            lightgoldenrodyellow, lightgray, lightgrey,
            lightgreen, lightpink, lightsalmon, lightseagreen,
            lightskyblue, lightslategray, lightslategrey,
            lightsteelblue, lightyellow, lime, limegreen,
            linen, magenta, maroon, mediumaquamarine,
            mediumblue, mediumorchid, mediumpurple,
            mediumseagreen, mediumslateblue, mediumspringgreen,
            mediumturquoise, mediumvioletred, midnightblue,
            mintcream, mistyrose, moccasin, navajowhite, navy,
            oldlace, olive, olivedrab, orange, orangered,
            orchid, palegoldenrod, palegreen, paleturquoise,
            palevioletred, papayawhip, peachpuff, peru, pink,
            plum, powderblue, purple, red, rosybrown,
            royalblue, rebeccapurple, saddlebrown, salmon,
            sandybrown, seagreen, seashell, sienna, silver,
            skyblue, slateblue, slategray, slategrey, snow,
            springgreen, steelblue, tan, teal, thistle, tomato,
            turquoise, violet, wheat, white, whitesmoke,
            yellow, yellowgreen

In [30]:
import os
import optuna
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Get Plotly's qualitative color scale
color_scale = px.colors.qualitative.Plotly

# Dictionary of samplers with their respective file lists
sampler_files = {
    "random": [
        '2024-10-12_15-27-35_bo_benchmark_random_seed0.db',
        '2024-10-12_15-27-44_bo_benchmark_random_seed1.db',
        '2024-10-12_15-27-53_bo_benchmark_random_seed2.db',
        '2024-10-12_15-28-02_bo_benchmark_random_seed3.db',
        '2024-10-12_15-28-10_bo_benchmark_random_seed4.db'
    ],
    "tpe": [
        '2024-10-12_15-29-04_bo_benchmark_tpe_seed0.db',
        '2024-10-12_15-29-15_bo_benchmark_tpe_seed1.db',
        '2024-10-12_15-29-26_bo_benchmark_tpe_seed2.db',
        '2024-10-12_15-29-38_bo_benchmark_tpe_seed3.db',
        '2024-10-12_15-29-50_bo_benchmark_tpe_seed4.db'
    ],
    "parafac": [
        '2024-10-12_15-32-23_bo_parafac_seed0.db',
        '2024-10-12_15-33-00_bo_parafac_seed1.db',
        '2024-10-12_15-33-37_bo_parafac_seed2.db',
        '2024-10-12_15-34-14_bo_parafac_seed3.db',
        '2024-10-12_15-34-52_bo_parafac_seed4.db'
    ]
}

db_folder = DB_DIR  # The folder where .db files are stored

# Initialize a figure for the plot
fig = go.Figure()

# Loop over each sampler and assign colors from the Plotly qualitative scale
for idx, (sampler, db_files) in enumerate(sampler_files.items()):
    
    # Initialize a list to store dataframes for each seed
    dataframes = []

    # Loop over each file, load the study, and calculate best values
    for db_file in db_files:
        path = os.path.join(db_folder, db_file)
        storage_url = f"sqlite:///{path}"
        
        # Load the study (no need for seed here since filenames are fixed)
        study_name = db_file.split('_bo_')[1].replace('.db', '')
        study = optuna.load_study(study_name="bo_"+study_name, storage=storage_url)
        
        # Get the trials as a dataframe
        df = study.trials_dataframe().copy()
        
        # Compute the best values for each iteration
        if study.direction == optuna.study.StudyDirection.MAXIMIZE:
            df['best_value'] = df['value'].cummax()
        else:
            df['best_value'] = df['value'].cummin()
        
        # Store the dataframe for the current seed
        dataframes.append(df[['number', 'best_value']])

    # Merge dataframes by iteration number
    merged_df = pd.concat(dataframes, axis=1, keys=[f'seed_{i}' for i in range(len(db_files))])

    # Compute mean and standard deviation of best values across seeds
    best_values = merged_df.xs('best_value', axis=1, level=1)
    mean_best = best_values.mean(axis=1)
    std_best = best_values.std(axis=1)

    # Compute upper and lower bounds for shaded area
    upper_bound = mean_best + std_best
    lower_bound = mean_best - std_best

    # Add the mean line for the current sampler using the color from the color scale
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=mean_best,
        mode='lines',
        name=f'Mean Best Value ({sampler})',
        line=dict(color=color_scale[idx])
    ))

    # Add the shaded error region for the current sampler without a legend
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=upper_bound,
        mode='lines',
        line=dict(width=0),  # No line for upper bound
        showlegend=False
    ))

    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=lower_bound,
        mode='lines',
        line=dict(width=0),  # No line for lower bound
        fill='tonexty',  # Fill the area between the upper and lower bound
        fillcolor=f'rgba{color_scale[idx][3:-1]}, 0.2)',  # Transparent fill for each color
        showlegend=False
    ))

# Set plot labels and title
fig.update_layout(
    title="Best Value with Shaded Error Region for Different Samplers",
    xaxis_title="Iteration",
    yaxis_title="Best Value",
    template="plotly_white"
)

# Show the plot
fig.show()

ValueError: 
    Invalid value of type 'builtins.str' received for the 'fillcolor' property of scatter
        Received value: 'rgba6EF, 0.2)'

    The 'fillcolor' property is a color and may be specified as:
      - A hex string (e.g. '#ff0000')
      - An rgb/rgba string (e.g. 'rgb(255,0,0)')
      - An hsl/hsla string (e.g. 'hsl(0,100%,50%)')
      - An hsv/hsva string (e.g. 'hsv(0,100%,100%)')
      - A named CSS color:
            aliceblue, antiquewhite, aqua, aquamarine, azure,
            beige, bisque, black, blanchedalmond, blue,
            blueviolet, brown, burlywood, cadetblue,
            chartreuse, chocolate, coral, cornflowerblue,
            cornsilk, crimson, cyan, darkblue, darkcyan,
            darkgoldenrod, darkgray, darkgrey, darkgreen,
            darkkhaki, darkmagenta, darkolivegreen, darkorange,
            darkorchid, darkred, darksalmon, darkseagreen,
            darkslateblue, darkslategray, darkslategrey,
            darkturquoise, darkviolet, deeppink, deepskyblue,
            dimgray, dimgrey, dodgerblue, firebrick,
            floralwhite, forestgreen, fuchsia, gainsboro,
            ghostwhite, gold, goldenrod, gray, grey, green,
            greenyellow, honeydew, hotpink, indianred, indigo,
            ivory, khaki, lavender, lavenderblush, lawngreen,
            lemonchiffon, lightblue, lightcoral, lightcyan,
            lightgoldenrodyellow, lightgray, lightgrey,
            lightgreen, lightpink, lightsalmon, lightseagreen,
            lightskyblue, lightslategray, lightslategrey,
            lightsteelblue, lightyellow, lime, limegreen,
            linen, magenta, maroon, mediumaquamarine,
            mediumblue, mediumorchid, mediumpurple,
            mediumseagreen, mediumslateblue, mediumspringgreen,
            mediumturquoise, mediumvioletred, midnightblue,
            mintcream, mistyrose, moccasin, navajowhite, navy,
            oldlace, olive, olivedrab, orange, orangered,
            orchid, palegoldenrod, palegreen, paleturquoise,
            palevioletred, papayawhip, peachpuff, peru, pink,
            plum, powderblue, purple, red, rosybrown,
            royalblue, rebeccapurple, saddlebrown, salmon,
            sandybrown, seagreen, seashell, sienna, silver,
            skyblue, slateblue, slategray, slategrey, snow,
            springgreen, steelblue, tan, teal, thistle, tomato,
            turquoise, violet, wheat, white, whitesmoke,
            yellow, yellowgreen

In [31]:
color_scale

['#636EFA',
 '#EF553B',
 '#00CC96',
 '#AB63FA',
 '#FFA15A',
 '#19D3F3',
 '#FF6692',
 '#B6E880',
 '#FF97FF',
 '#FECB52']

In [39]:
import os
import optuna
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px

# Function to convert hex to rgba
def hex_to_rgba(hex_color, alpha=0.2):
    hex_color = hex_color.lstrip('#')
    rgb = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
    return f'rgba({rgb[0]}, {rgb[1]}, {rgb[2]}, {alpha})'

# Get Plotly's qualitative color scale in hex
color_scale = px.colors.qualitative.Plotly

# Dictionary of samplers with their respective file lists
sampler_files = {
    "random": [
        '2024-10-12_15-27-35_bo_benchmark_random_seed0.db',
        '2024-10-12_15-27-44_bo_benchmark_random_seed1.db',
        '2024-10-12_15-27-53_bo_benchmark_random_seed2.db',
        '2024-10-12_15-28-02_bo_benchmark_random_seed3.db',
        '2024-10-12_15-28-10_bo_benchmark_random_seed4.db'
    ],
    "tpe": [
        '2024-10-12_15-29-04_bo_benchmark_tpe_seed0.db',
        '2024-10-12_15-29-15_bo_benchmark_tpe_seed1.db',
        '2024-10-12_15-29-26_bo_benchmark_tpe_seed2.db',
        '2024-10-12_15-29-38_bo_benchmark_tpe_seed3.db',
        '2024-10-12_15-29-50_bo_benchmark_tpe_seed4.db'
    ],
    "parafac": [
        '2024-10-12_15-32-23_bo_parafac_seed0.db',
        '2024-10-12_15-33-00_bo_parafac_seed1.db',
        '2024-10-12_15-33-37_bo_parafac_seed2.db',
        '2024-10-12_15-34-14_bo_parafac_seed3.db',
        '2024-10-12_15-34-52_bo_parafac_seed4.db'
    ]
}

db_folder = DB_DIR  # The folder where .db files are stored

# Initialize a figure for the plot
fig = go.Figure()

# Loop over each sampler and assign colors from the Plotly qualitative scale
for idx, (sampler, db_files) in enumerate(sampler_files.items()):
    
    # Initialize a list to store dataframes for each seed
    dataframes = []

    # Loop over each file, load the study, and calculate best values
    for db_file in db_files:
        path = os.path.join(db_folder, db_file)
        storage_url = f"sqlite:///{path}"
        
        # Load the study (no need for seed here since filenames are fixed)
        study_name = db_file.split('_bo_')[1].replace('.db', '')
        study = optuna.load_study(study_name="bo_"+study_name, storage=storage_url)
        
        # Get the trials as a dataframe
        df = study.trials_dataframe().copy()
        
        # Compute the best values for each iteration
        if study.direction == optuna.study.StudyDirection.MAXIMIZE:
            df['best_value'] = df['value'].cummax()
        else:
            df['best_value'] = df['value'].cummin()
        
        # Store the dataframe for the current seed
        dataframes.append(df[['number', 'best_value']])

    # Merge dataframes by iteration number
    merged_df = pd.concat(dataframes, axis=1, keys=[f'seed_{i}' for i in range(len(db_files))])

    # Compute mean and standard deviation of best values across seeds
    best_values = merged_df.xs('best_value', axis=1, level=1)
    mean_best = best_values.mean(axis=1)
    std_best = best_values.std(axis=1)

    # Compute upper and lower bounds for shaded area
    upper_bound = mean_best + std_best
    lower_bound = mean_best - std_best

    # Add the mean line for the current sampler using the color from the color scale
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=mean_best,
        mode='lines',
        name=f'Mean Best Value ({sampler})',
        line=dict(color=color_scale[idx])
    ))

    # Add the shaded error region for the current sampler without a legend
    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=upper_bound,
        mode='lines',
        line=dict(width=0),  # No line for upper bound
        showlegend=False
    ))

    fig.add_trace(go.Scatter(
        x=best_values.index,
        y=lower_bound,
        mode='lines',
        line=dict(width=0),  # No line for lower bound
        fill='tonexty',  # Fill the area between the upper and lower bound
        fillcolor=hex_to_rgba(color_scale[idx]),  # Convert hex to rgba with transparency
        showlegend=False
    ))

# Set plot labels and title
fig.update_layout(
    title="Best Value with Shaded Error Region for Different Samplers",
    xaxis_title="Iteration",
    yaxis_title="Best Value",
    template="plotly_white",
    legend=dict(
        orientation="h",
        x=0,
        y=1.1
    ),
    width=800,
    height=600
)

# Show the plot
fig.show()

In [2]:
import os
import optuna
import plotly.io as pio
from optuna.visualization import plot_optimization_history

# Directory to save the plots
output_dir = "history_plots"
os.makedirs(output_dir, exist_ok=True)  # Create directory if it doesn't exist

# Dictionary of samplers with their respective file lists
sampler_files = {
    "random": [
        '2024-10-12_15-27-35_bo_benchmark_random_seed0.db',
        '2024-10-12_15-27-44_bo_benchmark_random_seed1.db',
        '2024-10-12_15-27-53_bo_benchmark_random_seed2.db',
        '2024-10-12_15-28-02_bo_benchmark_random_seed3.db',
        '2024-10-12_15-28-10_bo_benchmark_random_seed4.db'
    ],
    "tpe": [
        '2024-10-12_15-29-04_bo_benchmark_tpe_seed0.db',
        '2024-10-12_15-29-15_bo_benchmark_tpe_seed1.db',
        '2024-10-12_15-29-26_bo_benchmark_tpe_seed2.db',
        '2024-10-12_15-29-38_bo_benchmark_tpe_seed3.db',
        '2024-10-12_15-29-50_bo_benchmark_tpe_seed4.db'
    ],
    "parafac": [
        '2024-10-12_15-32-23_bo_parafac_seed0.db',
        '2024-10-12_15-33-00_bo_parafac_seed1.db',
        '2024-10-12_15-33-37_bo_parafac_seed2.db',
        '2024-10-12_15-34-14_bo_parafac_seed3.db',
        '2024-10-12_15-34-52_bo_parafac_seed4.db'
    ]
}

db_folder = DB_DIR  # The folder where .db files are stored

# Loop over each sampler and its file list
for sampler, db_files in sampler_files.items():
    # Loop over each file (which corresponds to different seeds)
    for db_file in db_files:
        path = os.path.join(db_folder, db_file)
        storage_url = f"sqlite:///{path}"
        
        # Extract seed number from the filename
        seed = db_file.split('_')[-1].replace('.db', '')
        
        # Load the study
        study_name = db_file.split('_bo_')[1].replace('.db', '')
        study = optuna.load_study(study_name="bo_"+study_name, storage=storage_url)
        
        # Generate the optimization history plot
        fig = plot_optimization_history(study)
        
        # Set the title of the figure
        fig.update_layout(
            title=f"Optimization History for {sampler.capitalize()} (Seed {seed})"
        )
        
        # Save the figure as an image (e.g., random_seed0.png)
        output_path = os.path.join(output_dir, f"{sampler}_seed{seed}.png")
        pio.write_image(fig, output_path)
        
        print(f"Saved: {output_path}")

Saved: history_plots/random_seedseed0.png
Saved: history_plots/random_seedseed1.png
Saved: history_plots/random_seedseed2.png
Saved: history_plots/random_seedseed3.png
Saved: history_plots/random_seedseed4.png
Saved: history_plots/tpe_seedseed0.png
Saved: history_plots/tpe_seedseed1.png
Saved: history_plots/tpe_seedseed2.png
Saved: history_plots/tpe_seedseed3.png
Saved: history_plots/tpe_seedseed4.png
Saved: history_plots/parafac_seedseed0.png
Saved: history_plots/parafac_seedseed1.png
Saved: history_plots/parafac_seedseed2.png
Saved: history_plots/parafac_seedseed3.png
Saved: history_plots/parafac_seedseed4.png


In [4]:
import os
import optuna
import plotly.io as pio
from optuna.visualization import plot_optimization_history

# Directory to save the plot
output_dir = "history_plots"
os.makedirs(output_dir, exist_ok=True)  # Create directory if it doesn't exist

# File for the bruteforce sampler
bruteforce_file = '2024-10-12_15-30-00_bo_benchmark_bruteforce_seed0.db'

db_folder = DB_DIR  # The folder where .db files are stored
path = os.path.join(db_folder, bruteforce_file)
storage_url = f"sqlite:///{path}"

# Extract seed number from the filename (in this case, it's seed0)
seed = bruteforce_file.split('_')[-1].replace('.db', '')

# Load the study for the bruteforce sampler
study_name = bruteforce_file.split('_bo_')[1].replace('.db', '')
study = optuna.load_study(study_name="bo_"+study_name, storage=storage_url)

# Generate the optimization history plot for bruteforce
fig = plot_optimization_history(study)

# Set the title of the figure
fig.update_layout(
    title=f"Optimization History for Bruteforce (Seed {seed})"
)

# Save the figure as an image (e.g., bruteforce_seed0.png)
output_path = os.path.join(output_dir, f"bruteforce_seed{seed}.png")
pio.write_image(fig, output_path)

print(f"Saved: {output_path}")

Saved: history_plots/bruteforce_seedseed0.png
