In [1]:
## RQ4

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import functools
import os
import pickle
import sys

import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.lines as mlines
import matplotlib.transforms as mtransforms
import numpy as np
import pandas as pd
import scipy.stats as ss
import scipy.special as ssp

from dotenv import load_dotenv

load_dotenv()
project_root = os.environ["PROJECT_ROOT"]
sys.path.append(project_root)

In [4]:
import src.modules.result_analysis.loading as result_loading
import src.modules.result_analysis.model_standardization as ms

In [5]:
figures_root = os.path.join(project_root, "latex", "figures")
os.makedirs(figures_root, exist_ok=True)

In [6]:
plt.rc('font', size=20)
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{amsmath,amssymb,bm,bbm,lmodern}')

In [7]:
bins_mass_cols = [f"bins_mass_{x}" for x in range(10)]

In [8]:
NUM_FOLDS = 10

data_path_templates = {
    "MF_128": os.path.join(project_root, "logs", "LBD_results", "MF_128", "MF_128-{}-0", "export"),
    "CMF_128": os.path.join(project_root, "logs", "LBD_results", "CMF_128", "CMF_128-{}-0", "export"),
    "OrdRec-UI_512": os.path.join(project_root, "logs", "LBD_results", "OrdRec-UI_512", "OrdRec-UI_512-{}-0", "export"),
    "LBDS_512_sum_ab": os.path.join(project_root, "logs", "LBD_results", "LBDS_512_sum_ab", "LBDS_512_sum_ab-{}-0", "export"),
    "LBDA_512_sum_ab": os.path.join(project_root, "logs", "LBD_results", "LBDA_512_sum_ab", "LBDA_512_sum_ab-{}-0", "export")
}

print("Loading data")
data = {k: [result_loading.path_to_df(v.format(i)) for i in range(NUM_FOLDS)] for k, v in data_path_templates.items()}
print("Standardising")
confidence_models = {k: [ms.standardise_model(k, df) for df in dfs] for k, dfs in data.items()}

Loading data
Standardising


In [9]:
def user_best_single_fold_single_repeat(df, true_min_rating):
    best_user_scores = df[["score"]].loc[df.groupby("uid")["score"].apply(lambda x: np.random.choice(x.index[x == x.max()]))]
    order_sample = best_user_scores.sort_values("score").index[::-1]
    success = df.loc[order_sample]["rating"] >= true_min_rating
    return success

In [10]:
def single_fold(mass, rating, true_min_rating, repeats, uid=None, round_digits=6, verbose=True, single_repeat_fn=user_best_single_fold_single_repeat):
    df = pd.DataFrame.from_dict({"score": mass, "rating": rating})
    if uid is not None:
        df["uid"] = uid
    df["score"].round(round_digits)
    successes = []
    iterator = tqdm(range(repeats)) if verbose else range(repeats)
    for i in iterator:
        successes.append(single_repeat_fn(df, true_min_rating))
    return successes

In [11]:
def get_mass(df, name, pred_min_rating):
    if name.startswith("LBDA") or name.startswith("OrdRec"):
        return ordrec_cdf(df[bins_mass_cols].values, pred_min_rating)
    if name.startswith("LBD"):
        return beta_cdf(df["alpha"], df["beta"], pred_min_rating)
    return norm_cdf(df["mean"], np.sqrt(df["var"]), pred_min_rating)

In [12]:
def save_legend(path, ncol, fontsize=18, line_params=[]):
    figlegend = plt.figure(figsize=(0.5, 0.5))
    figlegend.legend(handles=[mlines.Line2D([], [], **line) for line in line_params],
               fontsize=fontsize,
               loc='center',
               ncol=ncol,
               frameon=False,
               borderaxespad=0,
               borderpad=0,
               labelspacing=0.2,
               columnspacing=1.)
    figlegend.savefig(path, bbox_inches='tight')

In [13]:
import pickle
from tqdm import tqdm
pred_min_rating = 4.5
true_min_rating = pred_min_rating
round_to = 6
rating_to_beta = lambda x: x/5-0.1
rating_to_norm = lambda x: x-0.25
rating_to_ordrec = lambda x: int(x*2)-1
repeats = 1
beta_cdf = lambda a, b, x: 1 - ss.beta(a,b).cdf(rating_to_beta(x))
norm_cdf = lambda mean, std, x: 1 - ss.norm(mean, std).cdf(rating_to_norm(x))
ordrec_cdf = lambda bin_mass, x: np.flip(np.cumsum(np.flip(bin_mass, axis=-1), axis=-1), axis=-1)[:,rating_to_ordrec(x)]

load_cum = 0  # Set this to 1 if want to load the previously saved results
save_cum = 1  # Set this to 0 if do not want to save the current results

num_folds = 10
cum_path = "RQ4_cum_precision@1"
cum = {}
successes = {k: [[]]*num_folds for k in data}

if load_cum:
    with open(cum_path, "rb") as input_file:
        cum = pickle.load(input_file)

for k,z,c,l in zip(["MF_128", "CMF_128", "OrdRec-UI_512", "LBDS_512_sum_ab", "LBDA_512_sum_ab"], [0, 1, 3, 4, 5], ["black", "C1", "C2", "C3", "C0"], ["MF", "CMF", "OUI", "LBDS", "LBDA"]):
    dfs = data[k]

    if save_cum:
        for fold in range(num_folds):
            df = dfs[fold]
            m = get_mass(df, k, pred_min_rating)
            successes[k][fold] = single_fold(m, df["rating"], true_min_rating, repeats, uid=df["uid"], round_digits=round_to, verbose=True, single_repeat_fn=user_best_single_fold_single_repeat)
        shortest_fold_len = np.min([len(successes[k][j][0]) for j in range(num_folds)])  # Different folds have different number of interactions
        successes[k] = [[inner[:shortest_fold_len] for inner in outer] for outer in successes[k]]
        cum[k] = np.array(successes[k]).cumsum(axis=-1)

if save_cum:
    with open(cum_path, "wb") as output_file:
        pickle.dump(cum, output_file)

100%|██████████| 1/1 [00:04<00:00,  4.81s/it]
100%|██████████| 1/1 [00:04<00:00,  4.77s/it]
100%|██████████| 1/1 [00:04<00:00,  4.70s/it]
100%|██████████| 1/1 [00:04<00:00,  4.67s/it]
100%|██████████| 1/1 [00:04<00:00,  4.66s/it]
100%|██████████| 1/1 [00:04<00:00,  4.69s/it]
100%|██████████| 1/1 [00:04<00:00,  4.73s/it]
100%|██████████| 1/1 [00:04<00:00,  4.62s/it]
100%|██████████| 1/1 [00:04<00:00,  4.67s/it]
100%|██████████| 1/1 [00:04<00:00,  4.69s/it]
100%|██████████| 1/1 [00:04<00:00,  4.76s/it]
100%|██████████| 1/1 [00:04<00:00,  4.76s/it]
100%|██████████| 1/1 [00:04<00:00,  4.67s/it]
100%|██████████| 1/1 [00:04<00:00,  4.73s/it]
100%|██████████| 1/1 [00:04<00:00,  4.66s/it]
100%|██████████| 1/1 [00:04<00:00,  4.78s/it]
100%|██████████| 1/1 [00:04<00:00,  4.77s/it]
100%|██████████| 1/1 [00:04<00:00,  4.64s/it]
100%|██████████| 1/1 [00:04<00:00,  4.73s/it]
100%|██████████| 1/1 [00:04<00:00,  4.65s/it]
100%|██████████| 1/1 [00:04<00:00,  4.66s/it]
100%|██████████| 1/1 [00:04<00:00,

In [14]:
figsize = (7.68,4.8)

### Figure 4

In [18]:
N = 52777
num_sampled = 100
ds = {}
fig, ax = plt.subplots(figsize = figsize)
x = np.unique(np.floor(np.logspace(np.log10(100), np.log10(N-1), num_sampled)).astype(int))
# x = np.arange(0, N, 1).astype(int)
for k,z,c,l in zip(["MF_128", "CMF_128", "OrdRec-UI_512", "LBDS_512_sum_ab", "LBDA_512_sum_ab"], [0, 1, 3, 4, 5], ["grey", "C2", "C0", "C1", "C3"], ["MF", "CMF", "OUI", "LBDS", "LBDA"]):
    d = (cum[k][:,:,:N]/np.arange(1, N+1))
    ds[k] = d
    y = d.mean(axis=(0,1))
    y_min = np.min(d, axis=(0,1))
    y_max = np.max(d, axis=(0,1))
    ax.plot(x+1, y[x], label=l, c=c)
    ax.fill_between(x, y_min[x], y_max[x], color=c, alpha=0.3)
ax.set_ylim(0.6, 1)
ax.set_xlim(100, N)
ax.set_xscale('log')
plt.savefig(f"{figures_root}/{cum_path}.pdf", format="pdf", pad_inches=0, dpi=300, bbox_inches="tight")

RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmpbkilry3y cebbd2118aa9b0e8268656ea497b427a.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./cebbd2118aa9b0e8268656ea497b427a.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmpbkilry3y/cebbd2118aa9b0e8268656ea497b427a.log.




Error in callback <function _draw_all_if_interactive at 0x13e513a30> (for post_execute), with arguments args (),kwargs {}:


RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmp8ikl8n4j cebbd2118aa9b0e8268656ea497b427a.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./cebbd2118aa9b0e8268656ea497b427a.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmp8ikl8n4j/cebbd2118aa9b0e8268656ea497b427a.log.




RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmpw8ssvoxb cebbd2118aa9b0e8268656ea497b427a.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./cebbd2118aa9b0e8268656ea497b427a.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmpw8ssvoxb/cebbd2118aa9b0e8268656ea497b427a.log.




<Figure size 768x480 with 1 Axes>

In [24]:
N = 52777
S = 1
log = True
x = np.unique(np.floor(np.logspace(np.log10(100), np.log10(N-1), num_sampled)).astype(int))
fig, ax = plt.subplots(figsize=figsize)
for k,z,c,l in zip(["MF_128", "CMF_128", "OrdRec-UI_512", "LBDS_512_sum_ab", "LBDA_512_sum_ab"], [0, 1, 3, 4, 5], ["grey", "C2", "C0", "C1", "C3"], ["MF", "CMF", "OUI", "LBDS", "LBDA"]):
    d = (cum[k]/cum["MF_128"])
    y = d.mean(axis=(0,1))
    y_min = np.min(d, axis=(0,1))
    y_max = np.max(d, axis=(0,1))
    ax.plot(x, y[x], label=l, c=c)
    ax.fill_between(x, y_min[x], y_max[x], color=c, alpha=0.3)
ax.set_ylim(0.99, 1.17)
ax.set_xlim(100, N)
ax.set_xscale('log')
plt.savefig(f"{figures_root}/{cum_path}_relative.pdf", format="pdf", pad_inches=0, dpi=300, bbox_inches="tight")

  d = (cum[k]/cum["MF_128"])
  d = (cum[k]/cum["MF_128"])


RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmpcatixeps cebbd2118aa9b0e8268656ea497b427a.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./cebbd2118aa9b0e8268656ea497b427a.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmpcatixeps/cebbd2118aa9b0e8268656ea497b427a.log.




Error in callback <function _draw_all_if_interactive at 0x13e513a30> (for post_execute), with arguments args (),kwargs {}:


RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmpmk2110jw cebbd2118aa9b0e8268656ea497b427a.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./cebbd2118aa9b0e8268656ea497b427a.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmpmk2110jw/cebbd2118aa9b0e8268656ea497b427a.log.




RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmp4hb18821 cebbd2118aa9b0e8268656ea497b427a.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./cebbd2118aa9b0e8268656ea497b427a.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmp4hb18821/cebbd2118aa9b0e8268656ea497b427a.log.




<Figure size 768x480 with 1 Axes>

In [25]:
plotting_params = [
    {
        "color": "grey",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 10,
        "fillstyle": "none",
        "label": "MF"
    },
    {
        "color": "C2",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 10,
        "fillstyle": "none",
        "label": "CMF"
    },
    {
        "color": "C0",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 10,
        "fillstyle": "none",
        "label": "OrdRec-UI"
    },
        {
        "color": "C1",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 10,
        "fillstyle": "none",
        "label": "LBD-S"
    },
        {
        "color": "C3",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 10,
        "fillstyle": "none",
        "label": "LBD-A"
    },
]

In [26]:
save_legend(f"{figures_root}/RQ4_legend.pdf", 5, line_params=plotting_params)

RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmp6ogl2amf 89071ab9f1d39f0d307bed35d4b82930.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./89071ab9f1d39f0d307bed35d4b82930.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmp6ogl2amf/89071ab9f1d39f0d307bed35d4b82930.log.




Error in callback <function _draw_all_if_interactive at 0x13e513a30> (for post_execute), with arguments args (),kwargs {}:


RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmpzbkrplqh 89071ab9f1d39f0d307bed35d4b82930.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./89071ab9f1d39f0d307bed35d4b82930.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmpzbkrplqh/89071ab9f1d39f0d307bed35d4b82930.log.




<Figure size 50x50 with 0 Axes>

### Table 4

In [35]:
pos = [100, 316,1000,3162,10000,31620]
pos = [100, 320,1000,3200,10000,32000] #TH
stat_sign = ss.wilcoxon(ds["LBDA_512_sum_ab"][:,:,pos], ds["OrdRec-UI_512"][:,:,pos], alternative="greater", axis=(0,1)); print(stat_sign)
stat_sign = ss.wilcoxon(ds["OrdRec-UI_512"][:,:,pos], ds["LBDS_512_sum_ab"][:,:,pos], alternative="greater", axis=(0,1)); print(stat_sign)
stat_sign = ss.wilcoxon(ds["LBDS_512_sum_ab"][:,:,pos], ds["OrdRec-UI_512"][:,:,pos], alternative="greater", axis=(0,1)); print(stat_sign)

WilcoxonResult(statistic=array([33.5, 42. , 48. ,  0. ,  0. ,  0. ]), pvalue=array([0.01488644, 0.01027193, 0.01832896, 0.99746898, 0.99746898,
       0.99746898]))
WilcoxonResult(statistic=array([28., 34., 49., 55., 55., 55.]), pvalue=array([0.00887796, 0.012442  , 0.01410568, 0.00253102, 0.00253102,
       0.00253102]))
WilcoxonResult(statistic=array([0., 2., 6., 0., 0., 0.]), pvalue=array([0.99112204, 0.987558  , 0.98589432, 0.99746898, 0.99746898,
       0.99746898]))


In [36]:
res = {k: " &  ".join([f"{v1:.3f} $\pm$ {v2:.3f}" for v1,v2 in zip(d.mean(axis=(0,1))[pos], d.std(axis=(0,1))[pos])]) + "\\\\" for k, d in ds.items()}
for r,v in res.items():
    print(v)

0.784 $\pm$ 0.027 &  0.789 $\pm$ 0.024 &  0.798 $\pm$ 0.013 &  0.798 $\pm$ 0.008 &  0.793 $\pm$ 0.006 &  0.679 $\pm$ 0.003\\
0.845 $\pm$ 0.031 &  0.843 $\pm$ 0.017 &  0.850 $\pm$ 0.011 &  0.837 $\pm$ 0.009 &  0.815 $\pm$ 0.007 &  0.698 $\pm$ 0.003\\
0.957 $\pm$ 0.018 &  0.948 $\pm$ 0.012 &  0.931 $\pm$ 0.004 &  0.909 $\pm$ 0.003 &  0.861 $\pm$ 0.003 &  0.745 $\pm$ 0.003\\
0.915 $\pm$ 0.027 &  0.932 $\pm$ 0.010 &  0.925 $\pm$ 0.006 &  0.899 $\pm$ 0.003 &  0.844 $\pm$ 0.003 &  0.731 $\pm$ 0.003\\
0.980 $\pm$ 0.014 &  0.963 $\pm$ 0.012 &  0.939 $\pm$ 0.008 &  0.887 $\pm$ 0.007 &  0.848 $\pm$ 0.004 &  0.732 $\pm$ 0.002\\
