## RQ3

In [19]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [20]:
import os
import sys

import matplotlib.lines as mlines
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
import numpy as np
import pandas as pd
import scipy.stats as ss

from sklearn.metrics import ndcg_score

from dotenv import load_dotenv

load_dotenv()
project_root = os.environ["PROJECT_ROOT"]
sys.path.append(project_root)

In [21]:
import src.modules.result_analysis.loading as result_loading
import src.modules.result_analysis.model_standardization as ms

In [22]:
figures_root = os.path.join(project_root, "latex", "figures")
os.makedirs(figures_root, exist_ok=True)

In [23]:
plt.rc('font', size=20)
plt.rc('text', usetex=True)
plt.rc('text.latex', preamble=r'\usepackage{amsmath,amssymb,bm,bbm,lmodern}')

In [24]:
def loglik(df):
    return np.log(np.take_along_axis((df[bins_mass_cols].values+1e-6)/(1.+1e-5), (df["rating"]*2-1).astype(int).values[:,None], axis=1)).sum()

In [25]:
bins_mass_cols = [f"bins_mass_{x}" for x in range(10)]
figsize = (6.095, 3.047)

In [26]:
NUM_FOLDS = 10

data_path_templates = {
    "CMF_128": os.path.join(project_root, "logs", "LBD_results", "CMF_128", "CMF_128-{}-0", "export"),
    "OrdRec-U_512": os.path.join(project_root, "logs", "LBD_results", "OrdRec-U_512", "OrdRec-U_512-{}-0", "export"), #TH
    "OrdRec-UI_512": os.path.join(project_root, "logs", "LBD_results", "OrdRec-UI_512", "OrdRec-UI_512-{}-0", "export"),
    "LBDS_512_sum_ab": os.path.join(project_root, "logs", "LBD_results", "LBDS_512_sum_ab", "LBDS_512_sum_ab-{}-0", "export"),
    "LBDA_512_sum_ab": os.path.join(project_root, "logs", "LBD_results", "LBDA_512_sum_ab", "LBDA_512_sum_ab-{}-0", "export"),
}

print("Loading data")
data = {k: [result_loading.path_to_df(v.format(i)) for i in range(NUM_FOLDS)] for k, v in data_path_templates.items()}
print("Standardising")
confidence_models = {k: [ms.standardise_model(k, df) for df in dfs] for k, dfs in data.items()}

Loading data
Standardising


In [27]:
corr_types = {"linear": lambda a,b: np.corrcoef(a,b)[0,1], "rank": lambda a,b: ss.kendalltau(a,b).statistic}
corr = {k: {m: [] for m in confidence_models} for k in corr_types}
for m, dfs in confidence_models.items():
    for k, corr_fn in corr_types.items():
        for df in dfs:
            corr[k][m].append(corr_fn(np.abs(df["err_mean"]), df["var"]))

In [28]:
# Linear Correlation
alternative = "greater"
metric = corr["linear"]
keys = list(metric.keys())
stat_sign = np.zeros((len(metric), len(metric)))
for i, k in enumerate(keys):
    for j, k2 in enumerate(keys):
        if i == j:
            continue
        stat_sign[i,j] = ss.wilcoxon(metric[k], metric[k2], alternative=alternative).pvalue
print("Linear Correlation")
for m, v in metric.items():
    print(f"{m}: {np.mean(v)} ({np.std(v)})")
print(f"\n(i,j) is p-value for alternative hypothesis that i is {alternative} than j.")
print(pd.DataFrame(stat_sign, index=keys, columns=keys))

Linear Correlation
CMF_128: 0.09328665252860693 (0.000990296356587272)
OrdRec-U_512: 0.15691181082642391 (0.0008132380657691038)
OrdRec-UI_512: 0.17516792911619994 (0.0008625270693139012)
LBDS_512_sum_ab: 0.28901988238306386 (0.0012061603931369973)
LBDA_512_sum_ab: 0.32493451705608944 (0.0019791117415829114)

(i,j) is p-value for alternative hypothesis that i is greater than j.
                  CMF_128  OrdRec-U_512  OrdRec-UI_512  LBDS_512_sum_ab  \
CMF_128          0.000000      1.000000       1.000000         1.000000   
OrdRec-U_512     0.000977      0.000000       1.000000         1.000000   
OrdRec-UI_512    0.000977      0.000977       0.000000         1.000000   
LBDS_512_sum_ab  0.000977      0.000977       0.000977         0.000000   
LBDA_512_sum_ab  0.000977      0.000977       0.000977         0.000977   

                 LBDA_512_sum_ab  
CMF_128                      1.0  
OrdRec-U_512                 1.0  
OrdRec-UI_512                1.0  
LBDS_512_sum_ab             

In [29]:
# Rank Correlation
alternative = "greater"
metric = corr["rank"]
keys = list(metric.keys())
stat_sign = np.zeros((len(metric), len(metric)))
for i, k in enumerate(keys):
    for j, k2 in enumerate(keys):
        if i == j:
            continue
        stat_sign[i,j] = ss.wilcoxon(metric[k], metric[k2], alternative=alternative).pvalue
print("Rank Correlation")
for m, v in metric.items():
    print(f"{m}: {np.mean(v)} ({np.std(v)})")
print(f"\n(i,j) is p-value for alternative hypothesis that i is {alternative} than j.")
print(pd.DataFrame(stat_sign, index=keys, columns=keys))

Rank Correlation
CMF_128: 0.052425820506652045 (0.0006799255076461247)
OrdRec-U_512: 0.09387730961427639 (0.0006442537681640701)
OrdRec-UI_512: 0.11195706791857667 (0.0013141482229959273)
LBDS_512_sum_ab: 0.18146497378014406 (0.0010254855388187659)
LBDA_512_sum_ab: 0.2103252934695885 (0.0008841942245131027)

(i,j) is p-value for alternative hypothesis that i is greater than j.
                  CMF_128  OrdRec-U_512  OrdRec-UI_512  LBDS_512_sum_ab  \
CMF_128          0.000000      1.000000       1.000000         1.000000   
OrdRec-U_512     0.000977      0.000000       1.000000         1.000000   
OrdRec-UI_512    0.000977      0.000977       0.000000         1.000000   
LBDS_512_sum_ab  0.000977      0.000977       0.000977         0.000000   
LBDA_512_sum_ab  0.000977      0.000977       0.000977         0.000977   

                 LBDA_512_sum_ab  
CMF_128                      1.0  
OrdRec-U_512                 1.0  
OrdRec-UI_512                1.0  
LBDS_512_sum_ab              

In [12]:
confidence_models = {k: pd.concat(v) for k, v in confidence_models.items()}

### Figure 3

In [13]:
def postprocess_fig(fig, ax, xlabel=None, ylabel=None, title=None, xscale="linear", yscale="linear"):
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(title)
    ax.set_xscale(xscale)
    ax.set_yscale(yscale)
    return fig
    
def save_fig(fig, path, *args, **kwargs):
    fig.savefig(path, *args, format="pdf", bbox_inches="tight", pad_inches=0, **kwargs)
    
def save_legend(path, ncol, fontsize=18, line_params=[]):
    figlegend = plt.figure(figsize=(0.5, 0.5))
    figlegend.legend(handles=[mlines.Line2D([], [], **line) for line in line_params],
               fontsize=fontsize,
               loc='center',
               ncol=ncol,
               frameon=False,
               borderaxespad=0,
               borderpad=0,
               labelspacing=0.2,
               columnspacing=1.)
    figlegend.savefig(path, bbox_inches='tight')

In [14]:
models = ["CMF_128", "OrdRec-UI_512", "LBDA_512_sum_ab"]
colors = ["C2", "C0", "C3"]

In [15]:
fig_axes = []

In [None]:
df = confidence_models["LBDA_512_sum_ab"]
fig, ax = plt.subplots(figsize=figsize)
fig_axes.append((fig, ax))
disp = lambda: df["var"]
err = lambda: np.abs(df["err_mean"])
non_outliers = np.ones_like(df["rating"], dtype=bool) 
num_bins = 1000

for i, (model, c) in enumerate(zip(models, colors)):
    df = confidence_models[model]
    non_outliers = (disp() <= disp().quantile(0.999)) & (disp() >= disp().quantile(0.0001))
    x, y = disp()[non_outliers], err()[non_outliers]
    _, x_bins = np.histogram(x, bins=num_bins)
    x_values, y_values = x_bins[:-1], y.groupby(pd.cut(x, x_bins)).mean().values
    ax.scatter(x_values, y_values, label=model, c=c)
    
line_params = [
        {
        "color": "C2",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 1,
        "fillstyle": "none",
        "label": "CMF"
    },
            {
        "color": "C0",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 1,
        "fillstyle": "none",
        "label": "OrdRec-UI"
    }, 
        {
        "color": "C3",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 1,
        "fillstyle": "none",
        "label": "LBD-A"
    }
]
ncol=1
fontsize=18
plt.gca().yaxis.set_ticks_position('both')
ax.set_ylim((0, ax.get_ylim()[1]))

  x_values, y_values = x_bins[:-1], y.groupby(pd.cut(x, x_bins)).mean().values
  x_values, y_values = x_bins[:-1], y.groupby(pd.cut(x, x_bins)).mean().values
  x_values, y_values = x_bins[:-1], y.groupby(pd.cut(x, x_bins)).mean().values


(0.0, 1.5104379545897246)

Error in callback <function _draw_all_if_interactive at 0x1099bfa30> (for post_execute), with arguments args (),kwargs {}:


RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmp0yfr5i_h cebbd2118aa9b0e8268656ea497b427a.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./cebbd2118aa9b0e8268656ea497b427a.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmp0yfr5i_h/cebbd2118aa9b0e8268656ea497b427a.log.




RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmpsc814fsy cebbd2118aa9b0e8268656ea497b427a.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./cebbd2118aa9b0e8268656ea497b427a.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmpsc814fsy/cebbd2118aa9b0e8268656ea497b427a.log.




<Figure size 609.5x304.7 with 1 Axes>

In [17]:
xlim1, ylim1 = ax.get_xlim(), ax.get_ylim()

In [None]:
postprocess_params = {
}
save_params = {
    "path": f"{figures_root}/RQ3_var_vs_mae.pdf",
    "dpi": 300,    
}
save_fig(postprocess_fig(fig, ax, **postprocess_params), **save_params)

RuntimeError: latex was not able to process the following string:
b'lp'

Here is the full command invocation and its output:

latex -interaction=nonstopmode --halt-on-error --output-directory=tmpr2yfugd5 cebbd2118aa9b0e8268656ea497b427a.tex

This is pdfTeX, Version 3.141592653-2.6-1.40.26 (TeX Live 2024) (preloaded format=latex)
 restricted \write18 enabled.
entering extended mode
(./cebbd2118aa9b0e8268656ea497b427a.tex
LaTeX2e <2023-11-01> patch level 1
L3 programming layer <2024-02-20>
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/article.cls
Document Class: article 2023/05/17 v1.4n Standard LaTeX document class
(/usr/local/texlive/2024basic/texmf-dist/tex/latex/base/size10.clo))

! LaTeX Error: File `type1cm.sty' not found.

Type X to quit or <RETURN> to proceed,
or enter new name. (Default extension: sty)

Enter file name: 
! Emergency stop.
<read *> 
         
l.7 \usepackage
               {type1ec}^^M
No pages of output.
Transcript written on tmpr2yfugd5/cebbd2118aa9b0e8268656ea497b427a.log.




In [None]:
fig, ax = plt.subplots(figsize=figsize)
disp = lambda: df["var"]
err = lambda: np.abs(df["err_mean"])
non_outliers = np.ones_like(df["rating"], dtype=bool) 
num_bins = 1000

for i, (model, c) in enumerate(zip(models, colors)):
    df = confidence_models[model]
    non_outliers = (disp() <= disp().quantile(0.999)) & (disp() >= disp().quantile(0.0001))
    x, y = disp()[non_outliers], err()[non_outliers]
    _, x_bins = np.histogram(x, bins=num_bins)
    x_bins2 = np.arange(num_bins)/num_bins
    groups = y.groupby(pd.cut(x, x_bins)).mean()
    x_values, y_values = x_bins2, groups.values
    ax.scatter(x_values, y_values, label=model, c=c)
plt.gca().yaxis.set_ticks_position('both')
ax.set_ylim(ylim1)

In [None]:
postprocess_params = {
}
save_params = {
    "path": f"{figures_root}/RQ3_var_vs_mae_rescaled_minmax.pdf",
    "dpi": 300,    
}
save_fig(postprocess_fig(fig, ax, **postprocess_params), **save_params)

In [None]:
fig, ax = plt.subplots(figsize=figsize)
disp = lambda: df["var"]
err = lambda: np.abs(df["err_mean"])
num_bins = 1000
non_outliers = np.ones_like(df["rating"], dtype=bool) 

for i, (model, c) in enumerate(zip(models, colors)):    
    df = confidence_models[model]
    non_outliers = (disp() <= disp().quantile(0.999)) & (disp() >= disp().quantile(0.001))
    x, y = disp()[non_outliers], err()[non_outliers]
    x_bins = np.array([pd.qcut(x, num_bins, duplicates="drop").values.categories[0].left] + [_.right for _ in pd.qcut(x,num_bins,duplicates="drop").values.categories])
    x_bins2 = np.array([pd.qcut(x, num_bins, duplicates="drop", labels=np.arange(num_bins)).values.categories[0]] + [_ for _ in pd.qcut(x,num_bins,duplicates="drop", labels=np.arange(num_bins)).values.categories])
    x_values, y_values = x_bins[:-1], y.groupby(pd.cut(x, x_bins)).mean().values
    conf = y.groupby(pd.cut(x, x_bins)).std().values
    if model.startswith("OrdRec"):
        ordrec_vals = y_values
    ax.scatter(np.arange(num_bins)/num_bins, y_values, label=model, c=c, alpha=1.)
plt.gca().yaxis.set_ticks_position('both')
ax.set_ylim(ylim1)

In [None]:
postprocess_params = {
}
save_params = {
    "path": f"{figures_root}/RQ3_var_vs_mae_rescaled_quantiles.pdf",
    "dpi": 300,    
}
save_fig(postprocess_fig(fig, ax, **postprocess_params), **save_params)

In [None]:
plotting_params = [
          {
        "color": "C2",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 10,
        "fillstyle": "none",
        "label": "CMF"
    }, {
        "color": "C0",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 10,
        "fillstyle": "none",
        "label": "OrdRec-UI"
    }, {
        "color": "C3",
        "linestyle": "-",
        "linewidth": 10,
        "markersize": 10,
        "fillstyle": "none",
        "label": "LBD-A"
    }
]

In [None]:
save_legend(f"{figures_root}/RQ3_legend.pdf", 3, line_params=plotting_params)

### Var vs mean pred

In [None]:
models = ["CMF_128", "OrdRec-UI_512", "LBDA_512_sum_ab"]
colors = ["C2", "C0", "C3"]

In [None]:
fig_axes = []

In [None]:
df = confidence_models["LBDA_512_sum_ab"]
fig, ax = plt.subplots(figsize=figsize)
fig_axes.append((fig, ax))
disp = lambda: df["var"]
err = lambda: df["mean"]
non_outliers = np.ones_like(df["rating"], dtype=bool) 
num_bins = 1000

for i, (model, c) in enumerate(zip(models, colors)):
    df = confidence_models[model]
    non_outliers = (disp() <= disp().quantile(0.999)) & (disp() >= disp().quantile(0.0001))
    x, y = disp()[non_outliers], err()[non_outliers]
    _, x_bins = np.histogram(x, bins=num_bins)
    x_values, y_values = x_bins[:-1], y.groupby(pd.cut(x, x_bins)).mean().values
    ax.scatter(x_values, y_values, label=model, c=c)
ax.set_xlim(xlim1)
ax.xaxis.set_major_locator(ticker.MultipleLocator(0.5))
plt.gca().yaxis.set_ticks_position('both')

In [None]:
ylim2 = ax.get_ylim()

In [None]:
postprocess_params = {
}
save_params = {
    "path": f"{figures_root}/RQ3_var_vs_pred_mean.pdf",
    "dpi": 300,    
}
save_fig(postprocess_fig(fig, ax, **postprocess_params), **save_params)

In [None]:
fig, ax = plt.subplots(figsize=figsize)
disp = lambda: df["var"]
err = lambda: df["mean"]

num_bins = 1000

for i, (model, c) in enumerate(zip(models, colors)):
    df = confidence_models[model]
    non_outliers = (disp() <= disp().quantile(0.999)) & (disp() >= disp().quantile(0.0001))
    x, y = disp()[non_outliers], err()[non_outliers]
    _, x_bins = np.histogram(x, bins=num_bins)
    x_bins2 = np.arange(num_bins)/num_bins
    groups = y.groupby(pd.cut(x, x_bins)).mean()
    x_values, y_values = x_bins2, groups.values
    ax.scatter(x_values, y_values, label=model, c=c)
ax.xaxis.set_major_locator(ticker.MultipleLocator(0.25))
plt.gca().yaxis.set_ticks_position('both')
ax.set_ylim(ylim2)

In [None]:
postprocess_params = {
}
save_params = {
    "path": f"{figures_root}/RQ3_var_vs_pred_mean_rescaled_minmax.pdf",
    "dpi": 300,    
}
save_fig(postprocess_fig(fig, ax, **postprocess_params), **save_params)

In [None]:
fig, ax = plt.subplots(figsize=figsize)
disp = lambda: df["var"]
err = lambda: df["mean"]
num_bins = 1000

for i, (model, c) in enumerate(zip(models, colors)):
    non_outliers = (disp() <= disp().quantile(0.999)) & (disp() >= disp().quantile(0.0001))
    df = confidence_models[model]
    x, y = disp()[non_outliers], err()[non_outliers]
    x_bins = np.array([pd.qcut(x, num_bins, duplicates="drop").values.categories[0].left] + [_.right for _ in pd.qcut(x,num_bins,duplicates="drop").values.categories])
    num_bins_kept = len(pd.qcut(x, num_bins, duplicates="drop").values.categories)
    x_bins2 = pd.qcut(x, num_bins, duplicates="drop", labels=np.arange(num_bins_kept)/num_bins_kept).values.categories
    x_values, y_values = x_bins[:-1], y.groupby(pd.cut(x, x_bins)).mean().values
    
    conf = y.groupby(pd.cut(x, x_bins)).std().values
    ax.scatter(x_bins2, y_values, label=model, c=c)
plt.gca().yaxis.set_ticks_position('both')

ax.set_ylim(ylim2)

In [None]:
postprocess_params = {
}
save_params = {
    "path": f"{figures_root}/RQ3_var_vs_pred_mean_rescaled_quantiles.pdf",
    "dpi": 300,    
}
save_fig(postprocess_fig(fig, ax, **postprocess_params), **save_params)