In [None]:
import mlflow
import pandas as pd

uri = ... # Set your MLflow tracking URI here
mlflow.set_tracking_uri(uri)

In [3]:
def generate_recommendations_table(experiment_ids, aggregation_function="common_features", note="sizes_acts", group_type="sim"):
    all_rows = []

    for exp_id in experiment_ids:
        runs = mlflow.search_runs(
            experiment_ids=[exp_id],
            output_format="list"
        )
        for run in runs:
            if run.data.params.get("note") != note or run.data.params.get("SAE_fusion_strategy") != aggregation_function or run.data.params.get("group_type") != group_type:
                continue

            dataset = run.data.params.get("dataset", f"Exp-{exp_id}")
            dim = int(run.data.params.get("embedding_dim", 0))
            topk = int(run.data.params.get("top_k", 0))

            row_key = (dim, topk)
            metrics = {
                (dataset, "G/mean"): run.data.metrics.get("CommonItemsNDCG20/mean"),
                (dataset, "U/mean"): run.data.metrics.get("NDCG20/mean"),
                (dataset, "U/min"): run.data.metrics.get("NDCG20/min"),
                (dataset, "Pop"): run.data.metrics.get("Popularity/mean"),
            }

            all_rows.append((row_key, metrics))

    # Build DataFrame from records
    records = {}
    for key, metrics in all_rows:
        if key not in records:
            records[key] = {}
        records[key].update(metrics)

    df = pd.DataFrame.from_dict(records, orient="index")
    df.index.names = ["Dimensions", "TopK"]

    # Sort and reindex columns by dataset then metric
    df = df.sort_index(axis=1, level=[0, 1]).sort_values(
        by=["Dimensions", "TopK"]
    )
    
    # Custom column sorting
    dataset_order = ["MovieLens", "LastFM1k"]
    metric_order = ["G/mean", "U/min", "U/mean", "Pop"]

    cols = df.columns
    cols = sorted(
        cols,
        key=lambda x: (
            dataset_order.index(x[0]) if x[0] in dataset_order else len(dataset_order),
            metric_order.index(x[1]) if x[1] in metric_order else len(metric_order)
        )
    )
    
    df = df[cols]

    return df.reset_index()


def generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_acts",
    note_to_compare="sizes_L2_without_acts",
    group_type="sim"
):
    all_rows = []

    for exp_id in experiment_ids:
        runs = mlflow.search_runs(
            experiment_ids=[exp_id],
            output_format="list"
        )

        for run in runs:
            note = run.data.params.get("note")
            fusion = run.data.params.get("SAE_fusion_strategy")
            group = run.data.params.get("group_type")

            if fusion != aggregation_function or group != group_type:
                continue

            dataset = run.data.params.get("dataset", f"Exp-{exp_id}")
            dim = int(run.data.params.get("embedding_dim", 0))
            topk = int(run.data.params.get("top_k", 0))

            row_key = (dim, topk)
            model_type = "main" if note == main_note else "compare" if note == note_to_compare else None
            if not model_type:
                continue

            metrics = {
                (dataset, "G/mean", model_type): run.data.metrics.get("CommonItemsNDCG20/mean"),
                (dataset, "U/mean", model_type): run.data.metrics.get("NDCG20/mean"),
                (dataset, "U/min", model_type): run.data.metrics.get("NDCG20/min"),
                (dataset, "Pop", model_type): run.data.metrics.get("Popularity/mean"),
            }

            all_rows.append((row_key, metrics))

    # Combine metrics into a dictionary
    records = {}
    for key, metrics in all_rows:
        if key not in records:
            records[key] = {}
        records[key].update(metrics)

    df = pd.DataFrame.from_dict(records, orient="index")
    
    df.index.names = ["Dimensions", "TopK"]

    # Calculate % differences
    result_cols = []
    for dataset, metric in sorted(set((k[0], k[1]) for k in df.columns)):
        main_col = (dataset, metric, "main")
        compare_col = (dataset, metric, "compare")
        percent_col = (dataset, metric, "% change")

        if main_col in df.columns and compare_col in df.columns:
            df[percent_col] = ((df[main_col] - df[compare_col]) / df[compare_col].abs()) * 100
            result_cols.extend([percent_col])
        elif main_col in df.columns:
            result_cols.append(main_col)

    # Keep only main and percent change columns, sort them by metric
    df = df[result_cols]
    df = df.round(2)
    df = df.sort_index(axis=1, level=0).sort_values(by=["Dimensions", "TopK"])
    
        # Custom column sorting
    dataset_order = ["MovieLens", "LastFM1k"]
    metric_order = ["G/mean", "U/min", "U/mean", "Pop"]

    cols = df.columns
    cols = sorted(
        cols,
        key=lambda x: (
            dataset_order.index(x[0]) if x[0] in dataset_order else len(dataset_order),
            metric_order.index(x[1]) if x[1] in metric_order else len(metric_order)
        )
    )
    df = df[cols]
    

    return df.reset_index()

In [4]:
import mlflow
import pandas as pd

def generate_common_features_table(experiment_ids, note="sizes_acts"):
    all_rows = []

    for exp_id in experiment_ids:
        runs = mlflow.search_runs(
            experiment_ids=[exp_id],
            filter_string=f"params.note = '{note}'",
            output_format="list"
        )

        for run in runs:
            params = run.data.params
            metrics = run.data.metrics
            
            if params.get("note") != note:
                continue
            

            dataset = params.get("dataset", f"Exp-{exp_id}")
            group_type = params.get("group_type", "unknown").replace("random", "Rand").capitalize()
            dim = int(params.get("embedding_dim", 0))
            topk = int(params.get("top_k", 0))
            value = metrics.get("common_features/mean", None)

            if value is None:
                continue

            row_key = (dim, topk)
            all_rows.append((row_key, (dataset, group_type), value))

    # Create dictionary for DataFrame
    records = {}
    for row_key, col_key, value in all_rows:
        if row_key not in records:
            records[row_key] = {}
        records[row_key][col_key] = value

    df = pd.DataFrame.from_dict(records, orient="index")
    df.index.names = ["Dimensions", "TopK"]

    # Reorder columns
    if not df.empty:
        datasets = ["MovieLens", "LastFM1k"]
        subcols = ["Sim", "Rand", "Outlier"]
        col_order = [(d, s) for d in datasets for s in subcols]
        df = df.reindex(columns=pd.MultiIndex.from_tuples(col_order)).sort_values(
            by=["Dimensions", "TopK"]
        )

    return df.reset_index()

# Turn off activation

Podle predchozich vysledku predevsim v tabulce ... bylo videt, ze pocet common features napric skupinou je predevsim pro male topk velmi nizka v jednotkach. Je tedy mozne, ze neziskavame vsechny spolecne rysy, coz ma za nasledek nizsi performance.

Nase idea jak by slo toto teoreticky vyresit je, pro inferenci vypnout aktivacni funkci topk tak abychom ziskali vice features pro jednotlive uzivatele. Idea je takova, ze aktivace topk vybira opravdu ty nejdulezitejsi rysy ktere specifikuje daneho uzivatele. Muze se ale stat, ze ackoliv jsou nektere rysy velmi podobne, nejsou definovany tou samou featurou a tedy je nikdy nemuzeme ziskat. Pokud tedy aktivaci vypneme, mohli bychom ziskat vice rysu, a tedy spojit i rysy, ktere uzivatele sdileji.

Nechte me jen pripomenout, ze i kdyz vypneme aktivaci topk, stale je zapnuta aktivace ReLu. Neznamena to tedy, ze by vsechny (nebo temer vsechny) rysy byly aktivovany, ale pouze ty, ktere maji pozitivni hodnotu.

Pojdme se tedy nejdrive kouknout kolik nenulovych feature ziskame pokud aktivaci vypnemepro jednotlive uzivatele. Jako priklad jsme vzali Sparse autoencoder s dimenzi 2048 a topk 64. Vzali jsme nahodne 5000 uzivatelu a vypocetli jsme si pro ne kolik rysu je nenulovych. Vysledky jsou videt v nasledujicim grafu.

Je videt, ze prumerne je aktivovano 242 neuronu, minimalne 129 a maximalne 425. Graf pripomona gausovske rozdeleni. Pro dimenzi 2048 tedy ziskavame tedy okolo 1/10 rysu.

Nyni se podivame co to dela s poctem common features napric skupinou.

## Common activated dimensions across group members

In [5]:
experiments = ['962723054918039068', '228719589483846826']
table = generate_common_features_table(experiments, note="sizes_L2_without_acts").round(1)
table

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Sim,Rand,Outlier,Sim,Rand,Outlier
0,1024,32,23.5,18.4,12.2,174.8,126.5,109.9
1,1024,64,41.0,35.5,26.5,133.3,95.3,86.4
2,1024,128,70.1,66.2,58.6,109.3,79.2,68.2
3,2048,32,84.5,69.8,63.9,352.1,258.0,209.4
4,2048,64,40.4,32.9,23.0,315.1,226.3,193.3
5,2048,128,65.6,58.1,41.5,167.3,113.6,97.3
6,4096,32,253.5,222.7,197.1,482.7,348.5,269.7
7,4096,64,70.1,54.0,42.2,568.6,414.1,334.3
8,4096,128,43.5,32.8,18.5,457.4,321.0,278.6


In [6]:
table.to_latex(
    "sae_table.tex",
    index=False,
    float_format="%.1f",
    bold_rows=True,
    column_format="ll|ccc|ccc",
    escape=False,
    caption="Table of mean activated dimension in the sparse embedding that are shared across all group members when the TopK activation function is deactivated for similar (Sim), random (Rand) and divergent (Div) groups",
    label="tab:activation:common-features"
)

Vypnuti aktivace zapricilo, ze mame mnohem vice spolecnych feature. Stale plati to, ze cim je skupina diverznejsi, tim je mene spolecnych rysu. To plati bez vyjimky na vsechny sae a oba datasety. Nicmene, jiz nevidime zadny trend v zavislosti na dimenze a topk. Nelze odhadnout, kolik bude mit dany sae spolecnych rysu.

Jako velmi zajimave take beru fakt, ze pro dimenzi 2048 a topk 64, ktery vidite nahore mame pro sim skupiny vice spolednych rysu nez je prumer pres uzivatele. Znamena to, ze dani uzivatele museji mit nadprumerny pocet spolecnych rysu.

Nyni se podivame, co to provede s doporucovanim pro skupiny. Nejprve se podivame na common features.

## SAE group recommendation performance for common features aggregation function and similar groups

Comparing the base variant with the variant where we turned off the activation function.

compare - base variant with activation function
main - variant without activation function

In [7]:
experiment_ids = ['333391697323445885', '523100174176986081']

def format_latex(df, highlight_max_cols=None, highlight_min_cols=None, round_digits=3):
    formatted_df = df.copy()
    highlight_max_cols = highlight_max_cols or []
    highlight_min_cols = highlight_min_cols or []

    for col in df.columns:
        col_values = df[col]

        if col in highlight_max_cols:
            top_two = col_values.nlargest(2).values

            def format_cell(val):
                if val == top_two[0]:
                    return f"\\textbf{{{val:.{round_digits}f}}}"
                elif val == top_two[1]:
                    return f"\\underline{{{val:.{round_digits}f}}}"
                else:
                    return f"{val:.{round_digits}f}"

        elif col in highlight_min_cols:
            bottom_two = col_values.nsmallest(2).values

            def format_cell(val):
                if val == bottom_two[0]:
                    return f"\\textbf{{{val:.{round_digits}f}}}"
                elif val == bottom_two[1]:
                    return f"\\underline{{{val:.{round_digits}f}}}"
                else:
                    return f"{val:.{round_digits}f}"
        else:
            def format_cell(val):
                return f"{val}"

        formatted_df[col] = col_values.apply(format_cell)

    return formatted_df

highlight_max_cols = [('MovieLens', 'G/mean'), ('MovieLens', 'U/min'), ('MovieLens', 'U/mean'), ('LastFM1k', 'G/mean'), ('LastFM1k', 'U/min'), ('LastFM1k', 'U/mean')]
highlight_min_cols = [('MovieLens', 'Pop'), ('LastFM1k', 'Pop')]

# nejdrive common_features
table = generate_recommendations_table(experiment_ids, aggregation_function="common_features", note="sizes_L2_without_acts")
formatted_table = format_latex(
    table,
    highlight_max_cols=highlight_max_cols,
    highlight_min_cols=highlight_min_cols,
    round_digits=2
)
table

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,G/mean,U/min,U/mean,Pop,G/mean,U/min,U/mean,Pop
0,1024,32,0.572468,0.500797,0.652702,0.467407,0.569658,0.618195,0.794603,0.581029
1,1024,64,0.584847,0.511633,0.663367,0.463238,0.567779,0.617902,0.794181,0.566396
2,1024,128,0.587235,0.51621,0.665646,0.453806,0.564924,0.614681,0.791259,0.56829
3,2048,32,0.586328,0.517296,0.663008,0.44687,0.57069,0.619441,0.794472,0.608829
4,2048,64,0.574459,0.505063,0.65493,0.450728,0.575675,0.62342,0.797806,0.582828
5,2048,128,0.584072,0.514205,0.663787,0.453288,0.569443,0.618002,0.794346,0.571757
6,4096,32,0.589128,0.517902,0.664986,0.463377,0.562452,0.613267,0.789805,0.618032
7,4096,64,0.602207,0.52374,0.672483,0.470159,0.573436,0.622059,0.795743,0.598407
8,4096,128,0.586471,0.510768,0.661652,0.464431,0.57765,0.627625,0.798515,0.575709


In [8]:
experiment_ids = ['333391697323445885', '523100174176986081']
table = generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_L2_without_acts",
    note_to_compare="sizes_L2_with_acts",
    group_type="sim"
)

highlight_max_cols = [('MovieLens', 'G/mean', r"% change"), ('MovieLens', 'U/min', r"% change"), ('MovieLens', 'U/mean', r"% change"), ('LastFM1k', 'G/mean', r"% change"), ('LastFM1k', 'U/min', r"% change"), ('LastFM1k', 'U/mean', r"% change")]
highlight_min_cols = [('MovieLens', 'Pop', r"% change"), ('LastFM1k', 'Pop', r"% change")]
table = format_latex(
    table,
    highlight_max_cols=highlight_max_cols,
    highlight_min_cols=highlight_min_cols,
    round_digits=3
)
table

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,G/mean,U/min,U/mean,Pop,G/mean,U/min,U/mean,Pop
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,% change,% change,% change,% change,% change,% change,% change,% change
0,1024,32,28.840,25.420,17.900,7.620,35.390,27.820,18.100,3.520
1,1024,64,12.570,11.020,8.720,6.040,23.810,17.990,11.180,\textbf{0.100}
2,1024,128,8.780,7.700,5.640,\underline{5.660},10.690,7.230,4.640,1.140
3,2048,32,\underline{40.290},\underline{35.970},\underline{25.160},7.670,\underline{48.250},\underline{37.170},\underline{25.070},19.100
4,2048,64,18.830,16.720,11.910,8.260,29.950,22.710,14.300,5.720
5,2048,128,10.380,8.800,6.020,6.150,13.680,9.590,6.330,2.200
6,4096,32,\textbf{47.840},\textbf{44.270},\textbf{30.410},9.850,\textbf{51.190},\textbf{41.070},\textbf{28.840},28.740
7,4096,64,23.010,21.740,14.440,\textbf{1.850},34.860,26.280,16.630,11.430
8,4096,128,15.930,13.010,8.880,5.970,19.260,15.100,8.600,\underline{0.720}


In [9]:
table.to_latex(
    "sae_table.tex",
    index=False,
    float_format="%.1f",
    bold_rows=True,
    column_format="ll|rrrr|rrrr",
    escape=True,
    caption = (
        "Table showing the percentage change in recommendation metrics for approaches without TopK activation, compared to the TopK-activated approach. The Common Features aggregation function and similar groups are used. "
        "'G/mean' shows the percentage change in mean NDCG@20 using ground-truth recommendations seen by all group members. "
        "'U/min' shows the change in the mean of the minimum NDCG@20 across group members. "
        "'U/mean' shows the change in the mean of the average NDCG@20 across group members. "
        "'Pop' shows the change in the mean popularity of recommended items."
    ),
    label="tab:activation:recommendation:common-features:sim"
)

In [10]:
experiment_ids = ['333391697323445885', '523100174176986081']
table = generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_L2_without_acts",
    note_to_compare="sizes_L2_with_acts",
    group_type="random"
)
table

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,G/mean,U/min,U/mean,Pop,G/mean,U/min,U/mean,Pop
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,% change,% change,% change,% change,% change,% change,% change,% change
0,1024,32,44.68,40.13,30.34,15.15,86.96,72.46,57.49,42.36
1,1024,64,20.66,19.13,13.64,8.87,53.74,44.07,33.05,20.86
2,1024,128,17.3,15.13,10.44,9.48,25.96,18.38,15.43,11.46
3,2048,32,73.43,65.82,45.13,20.1,144.28,118.21,92.37,80.88
4,2048,64,34.33,29.06,21.13,14.74,79.53,63.92,52.15,42.73
5,2048,128,16.89,15.68,10.21,9.75,37.96,27.29,22.03,18.28
6,4096,32,97.28,88.52,60.08,29.57,195.76,163.42,128.29,116.34
7,4096,64,37.24,34.62,24.86,9.3,105.38,84.34,66.46,57.23
8,4096,128,24.3,21.12,14.61,10.32,58.9,44.21,33.65,24.27


In [11]:
experiment_ids = ['333391697323445885', '523100174176986081']
table = generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_L2_without_acts",
    note_to_compare="sizes_L2_with_acts",
    group_type="outlier"
)
table

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,G/mean,U/min,U/mean,Pop,G/mean,U/min,U/mean,Pop
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,% change,% change,% change,% change,% change,% change,% change,% change
0,1024,32,136.83,112.09,69.06,23.84,260.28,205.3,147.82,113.28
1,1024,64,51.39,43.86,31.12,18.99,142.23,103.21,77.76,52.14
2,1024,128,35.81,32.18,20.17,16.38,60.45,39.46,31.66,24.67
3,2048,32,289.55,241.08,116.6,37.09,773.08,570.54,317.07,273.02
4,2048,64,86.91,70.03,44.09,23.42,352.41,268.36,173.19,146.18
5,2048,128,35.97,29.12,20.51,15.8,102.94,70.3,54.66,46.84
6,4096,32,462.07,355.21,166.42,66.86,1428.77,917.99,423.7,362.35
7,4096,64,149.39,134.17,79.38,25.48,428.09,316.32,205.77,176.58
8,4096,128,43.0,36.85,24.11,15.49,183.75,127.38,93.33,74.63


## SAE group recommendation performance for **average** aggregation function and similar groups

Comparing the base variant with the variant where we turned off the activation function.

compare - base variant with activation function
main - variant without activation function

In [15]:
experiment_ids = ['333391697323445885', '523100174176986081']
table = generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="average",
    main_note="sizes_L2_without_acts",
    note_to_compare="sizes_L2_with_acts",
    group_type="sim"
)
table

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,G/mean,U/min,U/mean,Pop,G/mean,U/min,U/mean,Pop
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,% change,% change,% change,% change,% change,% change,% change,% change
0,1024,32,-3.48,-2.74,-2.09,-8.65,-1.73,-0.84,-0.92,4.54
1,1024,64,-1.24,-0.95,-0.88,-3.82,0.24,0.82,0.03,-0.52
2,1024,128,-0.43,-0.36,-0.2,-0.83,0.49,0.71,0.07,-1.38
3,2048,32,-16.84,-13.75,-9.96,-23.62,-5.22,-4.34,-2.12,10.97
4,2048,64,-5.77,-4.59,-3.32,-11.33,-1.72,-1.2,-0.77,4.75
5,2048,128,-2.24,-1.76,-1.45,-5.84,-0.39,0.64,-0.22,-1.48
6,4096,32,-18.93,-15.86,-11.34,-21.34,-8.0,-6.46,-3.27,12.82
7,4096,64,-13.75,-11.12,-8.13,-19.63,-4.96,-4.07,-2.08,9.44
8,4096,128,-5.79,-4.23,-3.31,-9.73,-1.36,-0.45,-0.7,1.74


In [None]:
table.mean()

Dimensions                      2389.333333
TopK                              74.666667
MovieLens   G/mean  % change      -7.607778
            U/min   % change      -6.151111
            U/mean  % change      -4.520000
            Pop     % change     -11.643333
LastFM1k    G/mean  % change      -2.516667
            U/min   % change      -1.687778
            U/mean  % change      -1.108889
            Pop     % change       4.542222
dtype: float64

In [17]:
table.std()

Dimensions                      1354.624671
TopK                              42.332021
MovieLens   G/mean  % change       7.033455
            U/min   % change       5.855866
            U/mean  % change       4.172589
            Pop     % change       8.106966
LastFM1k    G/mean  % change       2.896079
            U/min   % change       2.632227
            U/mean  % change       1.141845
            Pop     % change       5.462231
dtype: float64

In [18]:
table.max()

Dimensions                      4096.00
TopK                             128.00
MovieLens   G/mean  % change      -0.43
            U/min   % change      -0.36
            U/mean  % change      -0.20
            Pop     % change      -0.83
LastFM1k    G/mean  % change       0.49
            U/min   % change       0.82
            U/mean  % change       0.07
            Pop     % change      12.82
dtype: float64

In [19]:
table.min()

Dimensions                      1024.00
TopK                              32.00
MovieLens   G/mean  % change     -18.93
            U/min   % change     -15.86
            U/mean  % change     -11.34
            Pop     % change     -23.62
LastFM1k    G/mean  % change      -8.00
            U/min   % change      -6.46
            U/mean  % change      -3.27
            Pop     % change      -1.48
dtype: float64

## SAE group recommendation performance for **average** aggregation function and **random groups**

In [28]:
experiment_ids = ['333391697323445885', '523100174176986081']
table = generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="average",
    main_note="sizes_L2_without_acts",
    note_to_compare="sizes_L2_with_acts",
    group_type="random"
)
table

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,G/mean,U/min,U/mean,Pop,G/mean,U/min,U/mean,Pop
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,% change,% change,% change,% change,% change,% change,% change,% change
0,1024,32,-7.29,-6.23,-4.25,-11.84,0.35,0.56,-0.2,2.95
1,1024,64,-2.73,-2.4,-1.48,-4.89,0.06,0.64,-0.2,-0.95
2,1024,128,-0.57,-0.45,-0.39,-1.11,-0.7,-0.21,-0.42,-1.64
3,2048,32,-26.33,-23.55,-15.63,-29.89,-1.15,-1.36,-0.76,7.28
4,2048,64,-10.53,-9.31,-6.44,-14.92,1.11,1.48,0.1,3.51
5,2048,128,-4.53,-4.25,-2.88,-7.39,-0.73,1.02,-0.7,-2.01
6,4096,32,-26.95,-23.37,-15.87,-25.9,-2.95,-2.99,-1.42,8.29
7,4096,64,-21.58,-18.51,-12.86,-24.71,-0.71,-0.77,-0.66,6.72
8,4096,128,-9.0,-7.63,-5.11,-11.99,0.8,1.59,-0.17,1.08


In [29]:
table.mean()

Dimensions                      2389.333333
TopK                              74.666667
MovieLens   G/mean  % change     -12.167778
            U/min   % change     -10.633333
            U/mean  % change      -7.212222
            Pop     % change     -14.737778
LastFM1k    G/mean  % change      -0.435556
            U/min   % change      -0.004444
            U/mean  % change      -0.492222
            Pop     % change       2.803333
dtype: float64

## SAE group recommendation performance for **average** aggregation function and **divergent groups**

In [30]:
experiment_ids = ['333391697323445885', '523100174176986081']
table = generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="average",
    main_note="sizes_L2_without_acts",
    note_to_compare="sizes_L2_with_acts",
    group_type="outlier"
)
table

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,G/mean,U/min,U/mean,Pop,G/mean,U/min,U/mean,Pop
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,% change,% change,% change,% change,% change,% change,% change,% change
0,1024,32,-26.48,-22.58,-13.89,-29.98,4.3,4.08,0.45,5.86
1,1024,64,-13.06,-10.64,-6.51,-15.13,0.45,1.61,-0.32,-1.33
2,1024,128,-4.09,-3.36,-2.11,-4.68,-2.62,-1.55,-0.92,-3.28
3,2048,32,-62.8,-54.63,-35.26,-59.7,6.46,5.52,0.73,13.36
4,2048,64,-34.54,-29.67,-18.55,-37.36,6.27,6.31,0.74,6.21
5,2048,128,-19.13,-15.6,-9.91,-20.86,-1.9,1.2,-1.33,-4.23
6,4096,32,-56.01,-48.38,-31.08,-50.98,4.97,4.74,-0.07,14.56
7,4096,64,-48.79,-40.59,-26.39,-46.99,6.1,6.78,0.42,12.17
8,4096,128,-24.49,-19.55,-12.55,-26.23,3.12,4.86,-0.18,2.06


In [31]:
table.mean()

Dimensions                      2389.333333
TopK                              74.666667
MovieLens   G/mean  % change     -32.154444
            U/min   % change     -27.222222
            U/mean  % change     -17.361111
            Pop     % change     -32.434444
LastFM1k    G/mean  % change       3.016667
            U/min   % change       3.727778
            U/mean  % change      -0.053333
            Pop     % change       5.042222
dtype: float64

## Kontrola, ze common features taky nefailuji

In [22]:
experiment_ids = ['333391697323445885', '523100174176986081']
generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_L2_without_acts",
    note_to_compare="sizes_L2_with_acts",
    group_type="random"
)

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,G/mean,U/min,U/mean,Pop,G/mean,U/min,U/mean,Pop
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,% change,% change,% change,% change,% change,% change,% change,% change
0,1024,32,44.68,40.13,30.34,15.15,86.96,72.46,57.49,42.36
1,1024,64,20.66,19.13,13.64,8.87,53.74,44.07,33.05,20.86
2,1024,128,17.3,15.13,10.44,9.48,25.96,18.38,15.43,11.46
3,2048,32,73.43,65.82,45.13,20.1,144.28,118.21,92.37,80.88
4,2048,64,34.33,29.06,21.13,14.74,79.53,63.92,52.15,42.73
5,2048,128,16.89,15.68,10.21,9.75,37.96,27.29,22.03,18.28
6,4096,32,97.28,88.52,60.08,29.57,195.76,163.42,128.29,116.34
7,4096,64,37.24,34.62,24.86,9.3,105.38,84.34,66.46,57.23
8,4096,128,24.3,21.12,14.61,10.32,58.9,44.21,33.65,24.27


In [23]:
experiment_ids = ['333391697323445885', '523100174176986081']
generate_recommendations_with_comparision(
    experiment_ids,
    aggregation_function="common_features",
    main_note="sizes_L2_without_acts",
    note_to_compare="sizes_L2_with_acts",
    group_type="outlier"
)

Unnamed: 0_level_0,Dimensions,TopK,MovieLens,MovieLens,MovieLens,MovieLens,LastFM1k,LastFM1k,LastFM1k,LastFM1k
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,G/mean,U/min,U/mean,Pop,G/mean,U/min,U/mean,Pop
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,% change,% change,% change,% change,% change,% change,% change,% change
0,1024,32,136.83,112.09,69.06,23.84,260.28,205.3,147.82,113.28
1,1024,64,51.39,43.86,31.12,18.99,142.23,103.21,77.76,52.14
2,1024,128,35.81,32.18,20.17,16.38,60.45,39.46,31.66,24.67
3,2048,32,289.55,241.08,116.6,37.09,773.08,570.54,317.07,273.02
4,2048,64,86.91,70.03,44.09,23.42,352.41,268.36,173.19,146.18
5,2048,128,35.97,29.12,20.51,15.8,102.94,70.3,54.66,46.84
6,4096,32,462.07,355.21,166.42,66.86,1428.77,917.99,423.7,362.35
7,4096,64,149.39,134.17,79.38,25.48,428.09,316.32,205.77,176.58
8,4096,128,43.0,36.85,24.11,15.49,183.75,127.38,93.33,74.63
