In [1]:
%cd ..

/Users/philipphager/Developer/ultr-cm-vs-ips


In [2]:
import altair as alt
#from altair_saver import save
import pandas as pd
from util import load_experiment

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
baseline_df, val_df, test_df = load_experiment("dataset_size")

In [4]:
baseline_df["model"] = "Production Ranker"

In [8]:
model2name = {
    "Neural PBM - Unbiased": "PBM - Unbiased",
    "Neural PBM - Biased": "PBM - Biased",
    "Neural PBM - Estimated bias": "PBM - Estimated Bias",
    "Pointwise IPS - Unbiased": "Point. IPS - Unbiased",
    "Pointwise IPS - Biased": "Point. IPS - Biased",
}

test_df.model = test_df.model.map(model2name)

In [9]:
def plot(dataset_df, legend=True, width=250, height=225, metric="average_relevant_position", title="", y=[0, 1.0], clip=False, label_y=True): 
    lines = alt.Chart(dataset_df, width=width, height=height, title=title).mark_line(clip=clip).encode(
        x=alt.X("n_sessions", scale=alt.Scale(type="log"), title="Number of Train Queries", axis=alt.Axis(format="~s")),
        y=alt.Y(f"mean({metric})", scale=alt.Scale(zero=False, domain=y), title=metric if label_y else None),
        color=alt.Color("model", legend=None),
        tooltip=[f"count({metric})", "n_sessions", f"mean({metric})"]
    )

    marks = alt.Chart(dataset_df).mark_point(clip=clip, size=50).encode(
        x=alt.X("n_sessions", scale=alt.Scale(type="log"), title="Number of Train Queries", axis=alt.Axis(format="~s")),
        y=alt.Y(f"mean({metric})", scale=alt.Scale(zero=False)),
        shape=alt.Shape("model"),
        color=alt.Color("model", legend=None),
        tooltip=[f"count({metric})", "n_sessions", f"mean({metric})"]
    )

    ci = alt.Chart(dataset_df).mark_errorband(opacity=0.5, clip=clip).encode(
        x=alt.X("n_sessions", scale=alt.Scale(type="log"), title="Number of Train Queries", axis=alt.Axis(format="~s")),
        y=alt.Y(metric, scale=alt.Scale(zero=False)),
        color=alt.Color("model", legend=None),
    )

    return alt.layer(
        lines,
        marks,
        ci
    )

yahoo_df = pd.concat([test_df[test_df.dataset == "Yahoo"], baseline_df[baseline_df.dataset == "Yahoo"]])
istella_df = pd.concat([test_df[test_df.dataset == "Istella-S"], baseline_df[baseline_df.dataset == "Istella-S"]])
mslr_df = pd.concat([test_df[test_df.dataset == "MSLR-Web30K"], baseline_df[baseline_df.dataset == "MSLR-Web30K"]])

chart = (
    plot(mslr_df, legend=False, metric="nDCG@10", title="MSLR-WEB30K", y=[0.2, .5], clip=True, label_y=True) |
    plot(istella_df, legend=False, metric="nDCG@10", title="Istella", y=[0.5, 0.75], clip=True, label_y=False) |
    plot(yahoo_df, legend=True, metric="nDCG@10", title="Yahoo", y=[0.59, 0.75], clip=True, label_y=False)
).configure_legend(
    orient="bottom",
    title=None,
    labelFont="serif",
    labelFontSize=14,
    columnPadding=20,
).configure_title(
    fontSize=14,
    fontWeight="normal",
    font="serif"
).configure_axis(
    titlePadding=10,
    titleFontSize=14,
    titleFontWeight="normal",
    titleFont="serif",
    labelFontSize=10,
    labelFontWeight="normal",
    labelFont="serif",
    tickCount=6
)

#save(chart, "figures/results.pdf")
chart

In [10]:
mslr_df.groupby(["model", "n_sessions"]).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,ARP,test_ndcg@1,nDCG@5,nDCG@10,nDCG,epoch,dir,random_state,experiment_name,trainer__target_,...,model__target_,model_layers,model_dropouts,model_activation__target_,model_n_results,model_n_features,model_loss__target_,model_optimizer,model_learning_rate,model_position_bias
model,n_sessions,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
Point. IPS - Unbiased,100,10,10,10,10,10,10,10,10,10,10,...,10,10,10,10,10,10,10,10,10,10
Point. IPS - Unbiased,1000,10,10,10,10,10,10,10,10,10,10,...,10,10,10,10,10,10,10,10,10,10
Point. IPS - Unbiased,10000,10,10,10,10,10,10,10,10,10,10,...,10,10,10,10,10,10,10,10,10,10
Point. IPS - Unbiased,100000,10,10,10,10,10,10,10,10,10,10,...,10,10,10,10,10,10,10,10,10,10
Point. IPS - Unbiased,1000000,10,10,10,10,10,10,10,10,10,10,...,10,10,10,10,10,10,10,10,10,10
Point. IPS - Unbiased,10000000,6,6,6,6,6,6,6,6,6,6,...,6,6,6,6,6,6,6,6,6,6
Production Ranker,100,10,10,10,10,10,0,0,10,10,10,...,10,10,10,10,10,10,10,10,10,10
Production Ranker,1000,10,10,10,10,10,0,0,10,10,10,...,10,10,10,10,10,10,10,10,10,10
Production Ranker,10000,10,10,10,10,10,0,0,10,10,10,...,10,10,10,10,10,10,10,10,10,10
Production Ranker,100000,10,10,10,10,10,0,0,10,10,10,...,10,10,10,10,10,10,10,10,10,10


# altair_saver-0.5.0
#!pip install altair_saver
# !pip install selenium
!conda install -c conda-forge vega-cli vega-lite-cli -y
# altair_saver