In [1]:
import os
import sys
import pandas as pd

tests_dirs = ['tests_idist_0dot2', 'tests_idist_0dot5', 'tests_idist_1dot0', 'tests_idist_1dot5']
tests_names = {'tests_idist_0dot2': '0.2', 'tests_idist_0dot5': '0.5', 'tests_idist_1dot0': '1.0', 'tests_idist_1dot5': '1.5'}
ratio_dfs = {tests_names[test]: [] for test in tests_dirs}
for t_dir in tests_dirs:
    if not os.path.exists(t_dir):
        print(f"Directory {t_dir} does not exist.")
        sys.exit(1)
    # Assume the ordered results folders correspond to: 
    #   0 - results of complete hl-order
    #   1 - results of the hl-order containing just the discretization hydrogen atoms
    #   2 - results of the Labiak-Lavor-Souza order containing all the hydrogen atoms
    results_dirs = [f for f in sorted(os.listdir(t_dir)) if os.path.isdir(os.path.join(t_dir, f))]

    df = pd.read_csv(os.path.join(t_dir, results_dirs[0], 'results.csv'))
    # filter the rows with num=1000 and ignore the indexes
    df = df[df['num'] == 1000].reset_index(drop=True)
    df1 = pd.read_csv(os.path.join(t_dir, results_dirs[1], 'results.csv'))
    df1 = df1[df1['num'] == 1000].reset_index(drop=True)
    df2 = pd.read_csv(os.path.join(t_dir, results_dirs[2], 'results.csv'))
    df2 = df2[df2['num'] == 1000].reset_index(drop=True)

    # from df and df1, create a new dataframe (call it 'df01') that contains the following two columns:
    #   - the name of instances (column 'pdb')
    #   - (df.nbbsols) / (df.nbbsols + df1.nbbsols)
    #   - the name of the test (column 'test')
    
    df01 = pd.DataFrame()
    df01['pdb'] = df['pdb']
    df01['ratio_nbb'] = df['nbbsols'] / (df['nbbsols'] + df1['nbbsols'])
    df01['test'] = tests_names[t_dir]
    ratio_dfs[tests_names[t_dir]].append(df01)

    # from df and df2, create a new dataframe (call it 'df02') that contains the following two columns:
    #   - the name of instances (column 'pdb')
    #   - (df.nbbsols) / (df.nbbsols + df2.nbbsols)
    #   - the name of the test (column 'test')
    df02 = pd.DataFrame()
    df02['pdb'] = df['pdb']
    df02['ratio_nbb'] = df['nbbsols'] / (df['nbbsols'] + df2['nbbsols'])
    df02['test'] = tests_names[t_dir]
    ratio_dfs[tests_names[t_dir]].append(df02)

In [None]:
# concatenate ratio_dfs['test02'][0], ratio_dfs['test05'][0], ratio_dfs['test10'][0], ratio_dfs['test15'][0] into a single dataframe
# call it 'df01_all'
df01_all = pd.concat([ratio_dfs['0.2'][0], ratio_dfs['0.5'][0], ratio_dfs['1.0'][0], ratio_dfs['1.5'][0]], ignore_index=True)
df02_all = pd.concat([ratio_dfs['0.2'][1], ratio_dfs['0.5'][1], ratio_dfs['1.0'][1], ratio_dfs['1.5'][1]], ignore_index=True)

print(df01_all)


In [None]:
import plotly.graph_objects as go
import pandas as pd

# Assuming df01_all is already created

# Define colors for each test
colors = ["dodgerblue", "red", "green", "orange"]

# Create boxplot traces
traces = []
for i, test in enumerate(df01_all['test'].unique()):
    group_data = df01_all[df01_all['test'] == test]['ratio_nbb']

    # Add box trace
    traces.append(go.Box(
        y=group_data,
        x=[test] * len(group_data),  # Ensures proper x-axis positioning
        name=test,
        boxmean=True,  # Show mean as a line
        line=dict(color="black"),
        fillcolor=colors[i],
        opacity=0.6,
        boxpoints="all",  # Show all individual points
        jitter=0.3,  # Adds slight horizontal variation for better readability
        marker=dict(opacity=0.6)
    ))

# Create figure
fig = go.Figure(traces)
# Improve layout. Always show the y-tick of 0.5 (it is quite important)
# yaxis_title="n<sub>P</sub> / (n<sub><span style='text-decoration:overline'>P</span></sub> + n<sub>P</sub>)",
# yaxis_title="n<sub>P</sub> / (n<span style='text-decoration:overline'><sub>P</sub></span> + n<sub>P</sub>)",
fig.update_layout(
    xaxis_title="interval length",
    yaxis_title="n<sub>P</sub> / (n<sub><span style='text-decoration:overline'>P</span></sub> + n<sub>P</sub>)",
    xaxis_title_font_size=20,
    yaxis_title_font_size=24,
    xaxis_tickfont_size=16,
    yaxis_tickfont_size=16,
    width=800,
    height=500,
    boxgap=0.2,  # Adds spacing between boxes
    boxgroupgap=0.1,  # Adjusts gaps within grouped boxes
    margin=dict(l=10, r=10, t=10, b=10),
)

fig.update_layout(showlegend=False)

# Show plot
fig.show()

# Save figure in PNG and PDF
fig.write_image("boxplot_complete_hl_order_x_essential_hydro_hl_order.png")
fig.write_image("boxplot_complete_hl_order_x_essential_hydro_hl_order.pdf", format="pdf")


In [None]:
import plotly.graph_objects as go
import pandas as pd

# Assuming df02_all is already created

# Define colors for each test. The first color should be a light blue
colors = ["dodgerblue", "red", "green", "orange"]

# Create boxplot traces
traces = []
for i, test in enumerate(df02_all['test'].unique()):
    group_data = df02_all[df02_all['test'] == test]['ratio_nbb']

    # Add box trace
    traces.append(go.Box(
        y=group_data,
        x=[test] * len(group_data),  # Ensures proper x-axis positioning
        name=test,
        boxmean=True,  # Show mean as a line
        line=dict(color="black"),
        fillcolor=colors[i],
        opacity=0.6,
        boxpoints="all",  # Show all individual points
        jitter=0.3,  # Adds slight horizontal variation for better readability
        marker=dict(opacity=0.6)
    ))

# Create figure
fig = go.Figure(traces)

# Improve layout
fig.update_layout(
    xaxis_title="interval length",
    yaxis_title="n<sub>P</sub> / (n<sub>LLS</sub> + n<sub>P</sub>)",
    xaxis_title_font_size=20,
    yaxis_title_font_size=24,
    xaxis_tickfont_size=16,
    yaxis_tickfont_size=16,
    width=800,
    height=500,
    boxgap=0.2,  # Adds spacing between boxes
    boxgroupgap=0.1,  # Adjusts gaps within grouped boxes
    margin=dict(l=10, r=10, t=10, b=10)
)

fig.update_layout(showlegend=False)

# Show plot
fig.show()

# Save figure in PNG and PDF
fig.write_image("boxplot_complete_hl_order_x_lls_order.png")
fig.write_image("boxplot_complete_hl_order_x_lls_order.pdf", format="pdf")