In [30]:
import pandas as pd
import re

# Load the CSV
df = pd.read_csv("all_results.csv")

# Function to extract seq_len, seq_num, alphabet from filename
def extract_file_info(filename):
    # Matches pattern: mglcs_<seq_len>_<seq_num>_<alphabet>_<instance_id>.txt.out
    match = re.match(r"mglcs_(\d+)_(\d+)_(\d+)_\d+\.txt\.out", filename)
    if match:
        seq_len, seq_num, alphabet = match.groups()
        return int(seq_len), int(seq_num), int(alphabet)
    else:
        return None

# Apply function to extract sequence info
df[["seq_len", "seq_num", "alphabet"]] = pd.DataFrame(
    df["file"].apply(extract_file_info).tolist(),
    index=df.index
)

# Define the full group
df["group"] = list(zip(
    df["beam_width"],
    df["heuristic"],
    df["imbs_iters"],
    df["seq_len"],
    df["seq_num"],
    df["alphabet"]
))

# Aggregate by group
agg_df = df.groupby("group").agg(
    avg_quality=("quality", "mean"),
    avg_time=("time", "mean")
).reset_index()

# Optional: expand group tuple into separate columns for clarity
agg_df[["beam_width","heuristic","imbs_iters","seq_len","seq_num","alphabet"]] = pd.DataFrame(
    agg_df["group"].tolist(), index=agg_df.index
)

agg_df

Unnamed: 0,group,avg_quality,avg_time,beam_width,heuristic,imbs_iters,seq_len,seq_num,alphabet
0,"(1, h2, 50, 2, 50, 2)",23.8,0.001683,1,h2,50,2,50,2
1,"(1, h2, 50, 2, 50, 4)",24.4,0.001210,1,h2,50,2,50,4
2,"(1, h2, 50, 2, 100, 2)",43.7,0.003910,1,h2,50,2,100,2
3,"(1, h2, 50, 2, 100, 4)",46.4,0.002818,1,h2,50,2,100,4
4,"(1, h2, 50, 2, 200, 2)",60.9,0.009927,1,h2,50,2,200,2
...,...,...,...,...,...,...,...,...,...
865,"(5000, h8, 1, 10, 50, 4)",1.9,0.002039,5000,h8,1,10,50,4
866,"(5000, h8, 1, 10, 100, 2)",1.1,0.004050,5000,h8,1,10,100,2
867,"(5000, h8, 1, 10, 100, 4)",2.2,0.004994,5000,h8,1,10,100,4
868,"(5000, h8, 1, 10, 200, 2)",2.5,0.015110,5000,h8,1,10,200,2


In [31]:
!pip ins install pandas

ERROR: unknown command "ins" - maybe you meant "install"


In [32]:
# Assuming agg_df is your aggregated dataframe with the 'group' column
# Expand the tuple into separate columns
agg_df[["beam_width", "heuristic", "imbs_iters", "seq_len", "seq_num", "alphabet"]] = pd.DataFrame(
    agg_df["group"].tolist(), index=agg_df.index
)

# Optional: drop the original 'group' column if you don't need it anymore
agg_df = agg_df.drop(columns=["group"])

agg_df
 

Unnamed: 0,avg_quality,avg_time,beam_width,heuristic,imbs_iters,seq_len,seq_num,alphabet
0,23.8,0.001683,1,h2,50,2,50,2
1,24.4,0.001210,1,h2,50,2,50,4
2,43.7,0.003910,1,h2,50,2,100,2
3,46.4,0.002818,1,h2,50,2,100,4
4,60.9,0.009927,1,h2,50,2,200,2
...,...,...,...,...,...,...,...,...
865,1.9,0.002039,5000,h8,1,10,50,4
866,1.1,0.004050,5000,h8,1,10,100,2
867,2.2,0.004994,5000,h8,1,10,100,4
868,2.5,0.015110,5000,h8,1,10,200,2


In [33]:
# further analisis (the basic Beam search):
beam_search_results = agg_df[agg_df["imbs_iters"] == 1]
beam_search_results




Unnamed: 0,avg_quality,avg_time,beam_width,heuristic,imbs_iters,seq_len,seq_num,alphabet
300,33.4,0.010125,100,h5,1,2,50,2
301,30.1,0.033305,100,h5,1,2,50,4
302,48.5,0.036847,100,h5,1,2,100,2
303,61.4,0.103674,100,h5,1,2,100,4
304,88.6,0.141423,100,h5,1,2,200,2
...,...,...,...,...,...,...,...,...
865,1.9,0.002039,5000,h8,1,10,50,4
866,1.1,0.004050,5000,h8,1,10,100,2
867,2.2,0.004994,5000,h8,1,10,100,4
868,2.5,0.015110,5000,h8,1,10,200,2


In [34]:
beam_search_bw_heuristic = beam_search_results.groupby(["beam_width", "heuristic"]).agg(
    avg_quality=("avg_quality", "mean"),
    avg_time=("avg_time", "mean")
).reset_index()


print('Beam search: baseline with one iteration of IMBS')
beam_search_bw_heuristic

Beam search: baseline with one iteration of IMBS


Unnamed: 0,beam_width,heuristic,avg_quality,avg_time
0,100,h5,29.036667,0.082904
1,100,h8,27.31,0.098187
2,500,h2,29.49,0.35452
3,500,h5,31.743333,0.4368
4,500,h8,31.22,0.438888
5,2000,h2,30.64,1.362717
6,2000,h5,32.26,1.629815
7,2000,h8,32.666667,1.800382
8,5000,h2,31.133333,4.649618
9,5000,h5,32.683333,5.898573


In [35]:
# further analisis (the basic Beam search):
imsbs = agg_df[agg_df["imbs_iters"] > 1]

imsbsbw_heuristic = imsbs.groupby(["beam_width", "heuristic"]).agg(
    avg_quality=("avg_quality", "mean"),
    avg_time=("avg_time", "mean")
).reset_index()


print('IMSBS: multiple iterations of IMBS')
imsbsbw_heuristic


IMSBS: multiple iterations of IMBS


Unnamed: 0,beam_width,heuristic,avg_quality,avg_time
0,1,h2,31.18,17.484907
1,1,h5,32.836667,18.658845
2,1,h8,33.625556,17.463325
3,100,h2,44.906667,1.953888
4,100,h5,49.396667,2.376629
5,100,h8,47.643333,2.249
6,500,h2,48.34,9.534901
7,500,h5,50.92,11.73599
8,500,h8,50.213333,10.860166
9,2000,h2,50.46,37.206547


In [36]:
# TODO: MERGING RESULTS: 

merged_df = pd.merge(
    beam_search_bw_heuristic,
    imsbsbw_heuristic,
    on=["beam_width", "heuristic"],
    how="inner",
    suffixes=("_df1", "_df2")
)

merged_df

Unnamed: 0,beam_width,heuristic,avg_quality_df1,avg_time_df1,avg_quality_df2,avg_time_df2
0,100,h5,29.036667,0.082904,49.396667,2.376629
1,100,h8,27.31,0.098187,47.643333,2.249
2,500,h2,29.49,0.35452,48.34,9.534901
3,500,h5,31.743333,0.4368,50.92,11.73599
4,500,h8,31.22,0.438888,50.213333,10.860166
5,2000,h2,30.64,1.362717,50.46,37.206547
6,2000,h5,32.26,1.629815,53.35,44.998005
7,2000,h8,32.666667,1.800382,52.226667,44.935887


In [37]:
#!pip install pandas

In [38]:
### IMSBS WITH BEAM_WIDTH=1 IS MISSING (TODO)

# further analisis (the basic Beam search):
imsbs_greedy = agg_df[agg_df["beam_width"] == 1]

imsbs_greedy_res = imsbs_greedy.groupby(["beam_width", "heuristic", "imbs_iters"]).agg(
    avg_quality=("avg_quality", "mean"),
    avg_time=("avg_time", "mean")
).reset_index()


print('IMSBS: bw=1')
imsbs_greedy_res

IMSBS: bw=1


Unnamed: 0,beam_width,heuristic,imbs_iters,avg_quality,avg_time
0,1,h2,50,27.876667,0.034212
1,1,h2,1000,32.02,2.665285
2,1,h2,5000,33.643333,49.755225
3,1,h5,50,29.343333,0.035305
4,1,h5,1000,33.89,2.697691
5,1,h5,5000,35.276667,53.243538
6,1,h8,50,30.27,0.05697
7,1,h8,1000,34.546667,2.967614
8,1,h8,5000,36.06,49.365389
