In [1]:
import mlflow
import pandas as pd

import mlflow
import pandas as pd

def generate_recommendations_table(experiment_ids):
    all_rows = []

    for exp_id in experiment_ids:
        runs = mlflow.search_runs(
            experiment_ids=[exp_id],
            output_format="list"
        )
        for run in runs:
            if run.data.params.get("group_set") != "test" or run.data.params.get("group_type") != "sim":
                continue

            dataset = run.data.params.get("dataset", f"Exp-{exp_id}")
            approach = run.data.params.get("recommender_strategy", None)
            aggregation = run.data.params.get("SAE_fusion_strategy", 'none')
            
            
            row_key = (approach, aggregation)
            metrics = {
                dataset: run.data.metrics.get("Time/mean", 0),
            }

            all_rows.append((row_key, metrics))

    # Build DataFrame from records
    records = {}
    for key, metrics in all_rows:
        if key not in records:
            records[key] = {}
        records[key].update(metrics)

    df = pd.DataFrame.from_dict(records, orient="index")
    df.index.names = ["Approach", "Aggregation"]

    # Sort and reindex columns by dataset then metric
    df = df.sort_index(axis=1, level=[0, 1]).sort_values(
        by=["Approach", "Aggregation"]
    )
    
    dataset_order = ["MovieLens", "LastFM1k"]

    cols = df.columns
    cols = sorted(
        cols,
        key=lambda x: (
            dataset_order.index(x) if x in dataset_order else len(dataset_order),
        )
    )
    df = df[cols]

    return df.reset_index()

def highlight_top3_dark_to_light(s):
    # Colors from dark to light
    colors = ['mediumseagreen', 'lightgreen']
    
    # Get sorted unique values in descending order
    top_values = s.nlargest(2).unique()
    
    # Assign background color depending on rank
    styles = ['' for _ in s]
    for rank, value in enumerate(top_values):
        styles = [
            f'background-color: {colors[rank]}' if v == value and styles[i] == '' else styles[i]
            for i, v in enumerate(s)
        ]
    return styles

def highlight_bottom3_dark_to_light(s):
    # Colors from dark to light
    colors = ['mediumblue', 'lightblue', 'paleturquoise']
    
    # Get sorted unique values in ascending order
    bottom_values = s.nsmallest(3).unique()
    
    # Assign background color depending on rank
    styles = ['' for _ in s]
    for rank, value in enumerate(bottom_values):
        styles = [
            f'background-color: {colors[rank]}' if v == value and styles[i] == '' else styles[i]
            for i, v in enumerate(s)
        ]
    return styles

# Comparing with other approaches

## Group Recommendations Results for **Similar** groups sorted by MovieLens G/mean

In [6]:
experiments = ['523100174176986081', '333391697323445885']

# Select only the desired columns for aggregation


table = generate_recommendations_table(experiments)
table = table[~table['Aggregation'].isin(["topk", "common_features", "wcom", "max"])]
# get time value for SAE for both datasets
sae_times = table[table['Approach'] == "SAE"].set_index('Aggregation').loc[:, ["MovieLens", "LastFM1k"]].values.flatten()
print(sae_times)
# normalize the times
table["MovieLens"] = table["MovieLens"] / sae_times[0]
table["LastFM1k"] = table["LastFM1k"] / sae_times[1]
selected_columns = ["MovieLens", "LastFM1k"]
    


table = table.sort_values(
    by=["Approach","Aggregation"], ascending=True
).drop(columns=["Aggregation"])

[0.00369252 0.00332541]


In [7]:
table

Unnamed: 0,Approach,MovieLens,LastFM1k
0,ADD,16.848631,18.484187
1,ELSA,0.324075,0.812582
2,ELSA_INT,0.756003,0.650647
3,EPFuzzDA,32.119747,35.039343
4,GFAR,46.347114,51.535524
5,LMS,17.249598,18.750383
6,MPL,18.433887,20.2468
7,SAE,1.0,1.0


In [8]:
table.to_latex(
    "sae_table.tex",
    index=False,
    float_format="%.1f",
    bold_rows=False,
    column_format="l|rr",
    escape=False,
    caption = (
        "Table with inference times for group RS approaches. The time is normalized to the SAE approach for each dataset. "
    ),
    label="tab:time"
)

## Group Recommendations Results for **Random** groups sorted by MovieLens G/mean

In [13]:
# get times as list
sae_times_list = sae_times.values.flatten().tolist()
sae_times_list

[0.002129436607589014, 0.0019456494331825524]

In [14]:
experiments = ['523100174176986081', '333391697323445885']

# Select only the desired columns for aggregation


table = generate_recommendations_table(experiments, dataset="LastFM1k")


selected_columns = []
group_types = ["sim", "random", "outlier"]
for grouptype in group_types:
    for metric in ["G/mean", "U/mean", "U/min"]:
        selected_columns.append((grouptype, metric))
    


table.sort_values(
    by=["Approach","Aggregation"], ascending=True
).style.apply(highlight_top3_dark_to_light, subset=selected_columns).format(precision=3)

TypeError: generate_recommendations_table() got an unexpected keyword argument 'dataset'

In [None]:
experiment_ids = ['523100174176986081', '333391697323445885']

df = generate_recommendations_table_with_best(experiment_ids, group_type="random").sort_values(by=[("MovieLens", "G/mean")], ascending=False)
df

Unnamed: 0_level_0,index,Approach,Aggregation,LastFM1k,LastFM1k,LastFM1k,LastFM1k,MovieLens,MovieLens,MovieLens,MovieLens
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,G/mean,Pop,U/mean,U/min,G/mean,Pop,U/mean,U/min
0,0,SAE,topk,0.517,0.636,0.756,0.557,0.638,0.56,0.695,0.546
1,1,SAE,average,0.527,0.639,0.761,0.561,0.633,0.546,0.691,0.542
2,2,ADD,,0.502,0.631,0.749,0.544,0.631,0.544,0.691,0.54
3,3,SAE,wcom,0.515,0.658,0.755,0.552,0.625,0.541,0.686,0.534
4,4,EPFuzzDA,,0.489,0.611,0.731,0.545,0.622,0.531,0.678,0.539
5,5,SAE,max,0.527,0.63,0.761,0.567,0.616,0.543,0.681,0.538
6,6,LMS,,0.459,0.59,0.702,0.504,0.606,0.509,0.647,0.506
7,7,ELSA,average,0.434,0.608,0.72,0.467,0.591,0.526,0.676,0.495
8,8,SAE,common_features,0.505,0.667,0.75,0.544,0.561,0.487,0.64,0.492
9,9,ELSA_INT,average,0.31,0.482,0.635,0.351,0.43,0.414,0.582,0.37


## Group Recommendations Results for **divergent** groups sorted by MovieLens G/mean

In [9]:
experiment_ids = ['523100174176986081', '333391697323445885']

df = generate_recommendations_table_with_best(experiment_ids, group_type="outlier").sort_values(by=[("MovieLens", "G/mean")], ascending=False)
df

Unnamed: 0_level_0,index,Approach,Aggregation,LastFM1k,LastFM1k,LastFM1k,LastFM1k,MovieLens,MovieLens,MovieLens,MovieLens
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,G/mean,Pop,U/mean,U/min,G/mean,Pop,U/mean,U/min
0,0,SAE,topk,0.409,0.584,0.715,0.447,0.564,0.505,0.679,0.506
1,1,SAE,average,0.45,0.66,0.729,0.488,0.546,0.48,0.669,0.496
2,2,ADD,,0.4,0.587,0.71,0.442,0.544,0.472,0.669,0.491
3,3,SAE,wcom,0.44,0.668,0.727,0.471,0.542,0.486,0.667,0.483
4,4,SAE,max,0.454,0.628,0.725,0.509,0.538,0.478,0.656,0.502
5,5,EPFuzzDA,,0.386,0.558,0.673,0.469,0.502,0.435,0.629,0.478
6,6,LMS,,0.346,0.521,0.622,0.398,0.448,0.399,0.569,0.416
7,7,SAE,common_features,0.428,0.649,0.715,0.472,0.421,0.388,0.577,0.414
8,8,ELSA,average,0.3,0.544,0.657,0.327,0.412,0.384,0.598,0.363
9,9,MPL,,0.19,0.435,0.564,0.368,0.277,0.318,0.49,0.321
