In [None]:
import mlflow
import pandas as pd

uri = 'file:///Users/vitkostejn/school/diploma_thesis/code/mlruns' # Set your MLflow tracking URI here
mlflow.set_tracking_uri(uri)


In [None]:
import pickle

def get_metric_data(run, metric='NDCG20'):
    artifact_path = run.info.artifact_uri.replace('file://', '') + '/logs.pkl'
    with open(artifact_path, 'rb') as f:
        data = pickle.load(f)
    return data[metric]

In [None]:
aggregation_mapping = {
    "average": "Avg",
    "max": "Max",
    "common_features": "ComF",
    "wcom": "WCom",
    "topk": "TopK",
}

def get_method_name(row):
    method = row['recommender_strategy']
    if method == "SAE":
        aggregation = row['SAE_fusion_strategy']
        method += "-" + aggregation_mapping.get(aggregation, aggregation)
    return method.replace('_', '-')

In [None]:

experiment_id = '523100174176986081'
# params.group_set = 'test' and params.user_set = 'test'
runs = mlflow.search_runs(
    experiment_ids=[experiment_id],
    filter_string="params.group_set = 'test' and params.user_set = 'test'",
    output_format="list"
)

In [None]:
from scipy import stats

significance = []

group_types = ['sim', 'random', 'outlier']
metrics = ['NDCG20_com', 'NDCG20_min', 'NDCG20_mean', 'Popularity/mean']
add_name_metrics = ['Group_NDCG', 'User_NDCG_Mins', 'User_NDCG_Means', 'Popularity']
official_metrics = ['$NDCG_{com}$', '$NDCG_{min}$', '$NDCG_{mean}$', 'Popularity']
for group_type in group_types:
    print(f"Group Type: {group_type.upper()}")
    group_runs = [run for run in runs if run.data.params.get('group_type') == group_type]
    print(f"Number of runs: {len(group_runs)}")
    for metric, add_name_metric, official_metric in zip(metrics, add_name_metrics, official_metrics):
        metric_data = get_metric_data(group_runs, metric=add_name_metric)
        ff = min if official_metric == 'Popularity' else max
        best_run = ff(metric_data.items(), key=lambda x: x[1].mean())
        print(f"Best {add_name_metric} for {group_type} groups: {best_run[0]} with mean {best_run[1].mean():.3f} and std {best_run[1].std():.3f}")
        
        # Get significance to the best run
        for i, run in enumerate(group_runs):
            strategy = get_method_name(run)
            data = {
                'Group Type': group_type,
                'Metric': official_metric,
                'Strategy': strategy,
                'Value': metric_data[strategy].mean(),
            }
            curr_results = metric_data[strategy]
            alternative = 'greater' if official_metric != 'Popularity' else 'less'
            t_stat, p_val = stats.ttest_rel(best_run[1].squeeze(), curr_results.squeeze(), alternative=alternative)
            print(f"{strategy}: t = {t_stat:.3f}  p = {p_val:.4f}")
            data['p-value'] = p_val
            if p_val <= 0.05 and strategy != best_run[0]:
                data['Significant'] = True
            else:
                data['Significant'] = False
            significance.append(data)

Group Type: SIM
Number of runs: 13
Best Group_NDCG for sim groups: SAE-TopK with mean 0.639 and std 0.175
MPL: t = 39.420  p = 0.0000
EPFuzzDA: t = 3.794  p = 0.0001
GFAR: t = 26.926  p = 0.0000
LMS: t = 5.889  p = 0.0000
ADD: t = 1.442  p = 0.0749
POPULAR: t = 37.411  p = 0.0000
SAE-WCom: t = 4.800  p = 0.0000
SAE-TopK: t = nan  p = nan
SAE-Max: t = 7.679  p = 0.0000
SAE-ComF: t = 12.313  p = 0.0000
SAE-Avg: t = 1.549  p = 0.0609
ELSA-INT: t = 29.603  p = 0.0000
ELSA: t = 10.837  p = 0.0000
Best User_NDCG_Mins for sim groups: SAE-TopK with mean 0.558 and std 0.125
MPL: t = 30.035  p = 0.0000
EPFuzzDA: t = 2.851  p = 0.0022
GFAR: t = 18.454  p = 0.0000
LMS: t = 10.187  p = 0.0000
ADD: t = 1.157  p = 0.1237
POPULAR: t = 39.956  p = 0.0000
SAE-WCom: t = 4.511  p = 0.0000
SAE-TopK: t = nan  p = nan
SAE-Max: t = 5.299  p = 0.0000
SAE-ComF: t = 13.381  p = 0.0000
SAE-Avg: t = 0.194  p = 0.4232
ELSA-INT: t = 29.505  p = 0.0000
ELSA: t = 16.341  p = 0.0000
Best User_NDCG_Means for sim groups:

In [None]:
df = pd.DataFrame(significance).sort_values(by=['Significant'], ascending=True)
df.head(20)
df.to_csv('paper/significance.csv', index=False)