In [None]:
import mlflow
import pandas as pd

uri = ... # Set your MLflow tracking URI here
mlflow.set_tracking_uri(uri)


In [2]:

def generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_acts",
    note_to_compare="sizes_L2_without_acts",
    group_type="sim"
):
    all_rows = []

    for exp_id in experiment_ids:
        runs = mlflow.search_runs(
            experiment_ids=[exp_id],
            output_format="list"
        )

        for run in runs:
            note = run.data.params.get("note")
            fusion = run.data.params.get("SAE_fusion_strategy")
            group = run.data.params.get("group_type")

            if fusion != aggregation_function or group != group_type:
                continue

            dataset = run.data.params.get("dataset", f"Exp-{exp_id}")
            dim = int(run.data.params.get("embedding_dim", 0))
            topk = int(run.data.params.get("top_k", 0))

            row_key = (dataset, dim, topk)
            model_type = "main" if note == main_note else "compare" if note == note_to_compare else None
            if not model_type:
                continue

            metrics = {
                ("G/mean", model_type): run.data.metrics.get("CommonItemsNDCG20/mean"),
                ("U/mean", model_type): run.data.metrics.get("NDCG20/mean"),
                ("U/min", model_type): run.data.metrics.get("NDCG20/min"),
                ("Pop", model_type): run.data.metrics.get("Popularity/mean"),
            }

            all_rows.append((row_key, metrics))

    # Combine metrics into a dictionary
    records = {}
    for key, metrics in all_rows:
        if key not in records:
            records[key] = {}
        records[key].update(metrics)

    df = pd.DataFrame.from_dict(records, orient="index")
    df.index.names = ["Dataset", "Dimensions", "TopK"]

    # Calculate % differences
    result_cols = []
    for metric in sorted(set(k[0] for k in df.columns)):
        main_col = (metric, "main")
        compare_col = (metric, "compare")
        percent_col = (metric, "% change")

        if main_col in df.columns and compare_col in df.columns:
            df[percent_col] = ((df[main_col] - df[compare_col]) / df[compare_col].abs()) * 100
            result_cols.extend([main_col, percent_col])
        elif main_col in df.columns:
            result_cols.append(main_col)

    # Keep only main and percent change columns, sort them by metric
    df = df[result_cols]
    df = df.round(2)
    df = df.sort_index(axis=1, level=0).sort_values(by=["Dataset", "Dimensions", "TopK"])
    

    return df.reset_index()

# Normalized embeddings

Jedna z moznych veci, co muze v modelech nastat je, ze velikost sparse embeddingu muze mit odlisnou distribuci mezi cleny skupiny. jinymi slovy, nekdo muze mit vetsi hodnoty embeddingu nez nekdo jiny. Pri agregaci by to pote znamenalo, ze nektere uzivatele budou vice ovlivnovat vysledny embedding nez jini. To muze byt problem, pokud chceme, aby vysledky byly fair pro vsechny uzivatele.

Pojdme nejdrive prozkoumat, zda takovy jev opravdu nastava. Koukneme se na distribuci sumy hodnot v embeddingu pres sample 5000 uzivatelu. Opet vezmeme stejny priklad jako minule tedy dimenzi 2048 a topk 64. Jak je videt na grafu, histogram sum embeddingu uzivatelu tvori normalni rozdeleni. Vsichni uzivatele tedy nemaji stejne hodnoty a normalizace by mohla pomoct udelat uzivatele fairnejsi.


Nyni se jeste pojdme podivat na graf pokud vypneme normalizaci. Jak je videt, zde uz se nejedna o ciste normalni rozdeleni, ale ocasek u vetsich hodnot je mnohem vyraznejsi. I zde by mohla normalizace pomoci, tak aby nebyli nekteri uzivatele vetsiho embeddingu preferovani oproti ostatnim.

Noramlizaci, kterou chceme pouzit je vzit L2 normu embeddingu a pronasobit ji prumernou hodnotou embeddingu. Tedy normalizace bude vypadat takto:

\begin{equation*}
\text{normalized\_embedding} = \frac{\text{embedding}}{\|\text{embedding}\|_2} \cdot \text{mean}(\text{embedding})
\end{equation*}

Podivejme se jak tedy vypadaji normalizovane doporuceni. Nejdrive pro commen features bez aktivace




## SAE group recommendation performance for **common features** aggregation function and **similar groups**

Comparision of base model and model with normalized embeddings

In [3]:
experiment_ids = ['333391697323445885', '523100174176986081']
table = generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_L2_without_acts_normalized",
    note_to_compare="sizes_L2_without_acts",
    group_type="sim"
)
table.groupby("Dataset").mean()

  table.groupby("Dataset").mean()


Unnamed: 0_level_0,Dimensions,TopK,G/mean,G/mean,Pop,Pop,U/mean,U/mean,U/min,U/min
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,% change,main,% change,main,% change,main,% change,main
Dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
LastFM1k,2389.333333,74.666667,-0.201111,0.568889,0.156667,0.586667,-0.046667,0.793333,-0.266667,0.618889
MovieLens,2389.333333,74.666667,0.175556,0.586667,0.284444,0.46,0.105556,0.662222,0.084444,0.513333


In [9]:
experiment_ids = ['333391697323445885', '523100174176986081']
table = generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_L2_without_acts_normalized",
    note_to_compare="sizes_L2_without_acts",
    group_type="random"
)
table.groupby("Dataset").mean()

  table.groupby("Dataset").mean()


Unnamed: 0_level_0,Dimensions,TopK,G/mean,G/mean,Pop,Pop,U/mean,U/mean,U/min,U/min
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,% change,main,% change,main,% change,main,% change,main
Dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
LastFM1k,2389.333333,74.666667,-0.247778,0.478889,0.112222,0.647778,-0.028889,0.737778,-0.355556,0.523333
MovieLens,2389.333333,74.666667,0.295556,0.533333,0.315556,0.48,0.145556,0.618889,0.185556,0.461111


In [10]:
experiment_ids = ['333391697323445885', '523100174176986081']
table =generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_L2_without_acts_normalized",
    note_to_compare="sizes_L2_without_acts",
    group_type="outlier"
)
table.groupby("Dataset").mean()

  table.groupby("Dataset").mean()


Unnamed: 0_level_0,Dimensions,TopK,G/mean,G/mean,Pop,Pop,U/mean,U/mean,U/min,U/min
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,% change,main,% change,main,% change,main,% change,main
Dataset,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
LastFM1k,2389.333333,74.666667,-0.308889,0.397778,0.2,0.606667,-0.003333,0.693333,-0.506667,0.443333
MovieLens,2389.333333,74.666667,0.507778,0.352222,0.606667,0.36,0.241111,0.521111,0.525556,0.34


Jak je videt, tato zmena je naprosto minimalni a nedochazi k zadne zmene v doporucenich. Nyni se podivejme na average s aktivaci. 

## SAE group recommendation performance for **average** aggregation function and **similar groups**

Comparision of base model and model with normalized embeddings

In [5]:
experiment_ids = ['333391697323445885', '523100174176986081']
generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="average",
    main_note="sizes_L2_with_acts_normalized",
    note_to_compare="sizes_L2_with_acts",
    group_type="sim"
)

Unnamed: 0_level_0,Dataset,Dimensions,TopK,G/mean,G/mean,Pop,Pop,U/mean,U/mean,U/min,U/min
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,% change,main,% change,main,% change,main,% change,main
0,LastFM1k,1024,32,-0.34,0.58,0.41,0.58,-0.1,0.8,-0.47,0.62
1,LastFM1k,1024,64,-0.03,0.58,0.37,0.57,-0.03,0.8,-0.35,0.63
2,LastFM1k,1024,128,-0.54,0.58,0.39,0.57,-0.12,0.8,-0.68,0.63
3,LastFM1k,2048,32,-0.57,0.59,0.69,0.58,-0.16,0.8,-0.72,0.63
4,LastFM1k,2048,64,-0.68,0.59,0.55,0.57,-0.19,0.8,-0.79,0.63
5,LastFM1k,2048,128,-0.55,0.59,0.52,0.57,-0.16,0.8,-0.8,0.63
6,LastFM1k,4096,32,-0.7,0.59,0.77,0.58,-0.2,0.81,-0.78,0.64
7,LastFM1k,4096,64,-0.7,0.59,0.48,0.57,-0.2,0.81,-0.84,0.64
8,LastFM1k,4096,128,-0.67,0.59,0.36,0.57,-0.2,0.8,-0.75,0.63
9,MovieLens,1024,32,-0.3,0.64,-0.04,0.5,-0.08,0.7,-0.28,0.55


In [6]:
experiment_ids = ['333391697323445885', '523100174176986081']
generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="average",
    main_note="sizes_L2_with_acts_normalized",
    note_to_compare="sizes_L2_with_acts",
    group_type="random"
)

Unnamed: 0_level_0,Dataset,Dimensions,TopK,G/mean,G/mean,Pop,Pop,U/mean,U/mean,U/min,U/min
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,% change,main,% change,main,% change,main,% change,main
0,LastFM1k,1024,32,-0.66,0.49,0.46,0.64,-0.15,0.75,-0.56,0.54
1,LastFM1k,1024,64,-0.51,0.49,0.35,0.63,-0.07,0.75,-0.66,0.53
2,LastFM1k,1024,128,-0.8,0.49,0.33,0.63,-0.12,0.75,-1.06,0.53
3,LastFM1k,2048,32,-1.08,0.5,0.83,0.65,-0.24,0.75,-1.06,0.54
4,LastFM1k,2048,64,-0.67,0.5,0.64,0.64,-0.1,0.75,-0.79,0.54
5,LastFM1k,2048,128,-1.07,0.5,0.49,0.64,-0.18,0.75,-1.23,0.54
6,LastFM1k,4096,32,-0.8,0.5,0.75,0.65,-0.14,0.76,-0.99,0.54
7,LastFM1k,4096,64,-1.07,0.5,0.53,0.64,-0.19,0.76,-1.12,0.55
8,LastFM1k,4096,128,-0.71,0.5,0.34,0.64,-0.12,0.75,-1.16,0.54
9,MovieLens,1024,32,-0.13,0.63,-0.13,0.55,-0.06,0.69,-0.35,0.53


In [7]:
experiment_ids = ['333391697323445885', '523100174176986081']
generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="average",
    main_note="sizes_L2_with_acts_normalized",
    note_to_compare="sizes_L2_with_acts",
    group_type="outlier"
)

Unnamed: 0_level_0,Dataset,Dimensions,TopK,G/mean,G/mean,Pop,Pop,U/mean,U/mean,U/min,U/min
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,% change,main,% change,main,% change,main,% change,main
0,LastFM1k,1024,32,-0.34,0.41,0.98,0.6,0.06,0.72,-1.02,0.45
1,LastFM1k,1024,64,-0.36,0.4,0.86,0.58,0.01,0.71,-0.84,0.43
2,LastFM1k,1024,128,-0.94,0.4,0.88,0.58,0.01,0.71,-1.25,0.43
3,LastFM1k,2048,32,-0.92,0.42,1.59,0.6,-0.04,0.72,-1.5,0.45
4,LastFM1k,2048,64,-0.8,0.41,1.25,0.59,-0.04,0.71,-1.49,0.44
5,LastFM1k,2048,128,-1.04,0.41,1.02,0.58,0.0,0.71,-1.36,0.44
6,LastFM1k,4096,32,-0.9,0.42,1.45,0.6,-0.12,0.72,-0.93,0.45
7,LastFM1k,4096,64,-1.51,0.42,1.0,0.59,-0.2,0.72,-1.61,0.45
8,LastFM1k,4096,128,-1.14,0.41,0.68,0.59,-0.15,0.71,-1.41,0.44
9,MovieLens,1024,32,-1.19,0.54,-0.77,0.48,-0.3,0.66,-0.64,0.48
