In [24]:
import pandas as pd
import numpy as np
from src.data.metrics import get_metrics

In [2]:
df_all = pd.read_json("../reports/all_models.jsonl", lines=True, orient="records")
metrics = ["top_1_accuracy", "top_5_accuracy", "macro_f1"]
get_metrics(df_all, metrics)
dfm = (
    df_all.groupby(["model_cls", "dataset", "loc_levels", "n_users"])[metrics]
    .agg(["mean", "std"])
    .reset_index()
)

In [9]:
dfm.loc[dfm["loc_levels"] == 2, "loc_levels"] = 1

In [10]:
dft = dfm[(dfm["n_users"].isin([150, 800]))]
dft = dft.drop(columns=["n_users"])
dft = dft.set_index(["loc_levels", "dataset", "model_cls"])
dft = dft.sort_index()
dft *= 100
dft

  dft = dft.drop(columns=["n_users"])


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,top_1_accuracy,top_1_accuracy,top_5_accuracy,top_5_accuracy,macro_f1,macro_f1
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,mean,std,mean,std,mean,std
loc_levels,dataset,model_cls,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
1,foursquare_NYC,BiTULER,60.118266,0.044682,67.200823,0.043064,57.829871,0.080872
1,foursquare_NYC,DeepTUL,58.716064,0.07353,65.484008,0.04568,56.600267,0.10486
1,foursquare_NYC,MainTUL,55.666961,0.103056,62.610017,0.078141,53.012503,0.113486
1,foursquare_NYC,T3S,52.98409,0.056556,60.282597,0.052468,49.495928,0.047051
1,foursquare_NYC,TULERG,58.866673,0.061295,65.740468,0.066904,56.061457,0.068486
1,foursquare_NYC,TULERL,58.469078,0.086665,65.279493,0.050472,55.598652,0.097819
1,foursquare_NYC,TULHOR,53.853442,0.079035,61.13006,0.057145,50.397196,0.081979
1,foursquare_NYC,TULVAE,59.786011,0.256308,66.768271,0.240706,57.323329,0.312511
1,foursquare_TKY,BiTULER,61.280729,0.05822,73.083263,0.06544,58.955797,0.028116
1,foursquare_TKY,DeepTUL,59.143229,0.048352,70.660077,0.063159,56.901586,0.083056


In [49]:
dfl = dft.xs("mean", axis=1, level=1, drop_level=True)
dfl = dfl.rename(
    columns={
        "top_1_accuracy": "Acc@1",
        "top_5_accuracy": "Acc@5",
        "macro_f1": "Macro F1",
    }
)
dfl = dfl.pivot_table(index=["loc_levels", "model_cls"], columns="dataset")
dfl.columns = dfl.columns.reorder_levels([1, 0])
dfl = dfl.sort_index(axis=1)
dfl = dfl[
    np.isin(
        dfl.index.get_level_values(1),
        ["BiTULER", "TULVAE", "DeepTUL", "MainTUL", "T3S", "TULHOR"],
    )
]
dfl = dfl.rename(
    columns={
        "foursquare_NYC": "Foursquare-NYC",
        "foursquare_TKY": "Foursquare-TKY",
        "geolife": "GeoLife",
    }
)
dfl.rename_axis(["Emb. Levels", "Model"], axis=0, inplace=True)

In [50]:
def highlight_max(s):
    """Highlight maximum value in bold for a Series."""
    is_max = s == s.max()
    return ["font-weight: bold" if v else "" for v in is_max]


df1 = dfl.xs(1, level=0)
df1 = df1.loc[["BiTULER", "TULVAE", "DeepTUL", "MainTUL", "T3S", "TULHOR"]]

latex_output = (
    df1.style.format(precision=2)
    .apply(highlight_max)
    .to_latex(hrules=True, convert_css=True)
)
print(latex_output)


\begin{tabular}{lrrrrrrrrr}
\toprule
dataset & \multicolumn{3}{r}{Foursquare-NYC} & \multicolumn{3}{r}{Foursquare-TKY} & \multicolumn{3}{r}{GeoLife} \\
 & Acc@1 & Acc@5 & Macro F1 & Acc@1 & Acc@5 & Macro F1 & Acc@1 & Acc@5 & Macro F1 \\
Model &  &  &  &  &  &  &  &  &  \\
\midrule
BiTULER & \bfseries 60.12 & \bfseries 67.20 & \bfseries 57.83 & \bfseries 61.28 & \bfseries 73.08 & \bfseries 58.96 & \bfseries 37.56 & 70.85 & 26.69 \\
TULVAE & 59.79 & 66.77 & 57.32 & 54.19 & 64.92 & 49.85 & 37.08 & 70.45 & 25.25 \\
DeepTUL & 58.72 & 65.48 & 56.60 & 59.14 & 70.66 & 56.90 & 36.32 & \bfseries 72.64 & \bfseries 29.82 \\
MainTUL & 55.67 & 62.61 & 53.01 & 56.81 & 69.18 & 54.09 & 34.00 & 70.26 & 21.76 \\
T3S & 52.98 & 60.28 & 49.50 & 53.65 & 66.30 & 50.38 & 35.25 & 71.11 & 21.52 \\
TULHOR & 53.85 & 61.13 & 50.40 & 54.39 & 67.24 & 51.06 & 34.65 & 72.46 & 24.92 \\
\bottomrule
\end{tabular}



In [77]:
df2 = dfl.xs(4, level=0)
df2 = df2.loc[["BiTULER", "TULVAE", "DeepTUL", "MainTUL", "T3S", "TULHOR"]]

# Calculate differences between df2 and df1
df_diff = df2 - df1

# Color entries in df_diff based on value
style = df_diff.style.background_gradient(
    cmap="RdYlGn", vmin=-12, vmax=12, axis=None, text_color_threshold=0.2
).format("{:+.2f}")
style


dataset,Foursquare-NYC,Foursquare-NYC,Foursquare-NYC,Foursquare-TKY,Foursquare-TKY,Foursquare-TKY,GeoLife,GeoLife,GeoLife
Unnamed: 0_level_1,Acc@1,Acc@5,Macro F1,Acc@1,Acc@5,Macro F1,Acc@1,Acc@5,Macro F1
Model,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
BiTULER,1.49,3.58,1.71,1.44,2.87,1.44,8.72,6.98,6.26
TULVAE,1.79,3.73,2.04,2.9,4.29,3.22,8.01,6.57,4.74
DeepTUL,1.06,3.04,1.26,0.94,2.44,0.95,8.59,6.31,5.97
MainTUL,1.44,4.33,1.52,1.64,3.49,1.56,8.19,6.18,5.5
T3S,1.71,3.13,2.1,1.71,2.84,1.9,6.98,4.8,5.18
TULHOR,1.71,3.16,2.11,1.86,2.75,2.1,8.2,5.03,5.68


In [78]:
print(style.to_latex(hrules=True, convert_css=True))

\begin{tabular}{lrrrrrrrrr}
\toprule
dataset & \multicolumn{3}{r}{Foursquare-NYC} & \multicolumn{3}{r}{Foursquare-TKY} & \multicolumn{3}{r}{GeoLife} \\
 & Acc@1 & Acc@5 & Macro F1 & Acc@1 & Acc@5 & Macro F1 & Acc@1 & Acc@5 & Macro F1 \\
Model &  &  &  &  &  &  &  &  &  \\
\midrule
BiTULER & {\cellcolor[HTML]{E8F59F}} \color[HTML]{000000} +1.49 & {\cellcolor[HTML]{BFE47A}} \color[HTML]{000000} +3.58 & {\cellcolor[HTML]{E3F399}} \color[HTML]{000000} +1.71 & {\cellcolor[HTML]{E8F59F}} \color[HTML]{000000} +1.44 & {\cellcolor[HTML]{CFEB85}} \color[HTML]{000000} +2.87 & {\cellcolor[HTML]{E8F59F}} \color[HTML]{000000} +1.44 & {\cellcolor[HTML]{36A657}} \color[HTML]{000000} +8.72 & {\cellcolor[HTML]{6BBF64}} \color[HTML]{000000} +6.98 & {\cellcolor[HTML]{7FC866}} \color[HTML]{000000} +6.26 \\
TULVAE & {\cellcolor[HTML]{E2F397}} \color[HTML]{000000} +1.79 & {\cellcolor[HTML]{BDE379}} \color[HTML]{000000} +3.73 & {\cellcolor[HTML]{DFF293}} \color[HTML]{000000} +2.04 & {\cellcolor[HTML]{CFEB85}}