!mamba install -c conda-forge "vegafusion-python-embed>=1.4.0" "vegafusion>=1.4.0" -y

In [1]:
import glob
import os

import altair as alt
import pandas as pd

from pathlib import Path
from theme import theme

In [2]:
alt.data_transformers.enable("vegafusion")
alt.themes.register("latex", theme)
alt.themes.enable("latex")

ThemeRegistry.enable('latex')

## Load results

In [100]:
directory = Path("outputs")
data = ["baidu", "ltr", "uva"]

In [17]:
def run_complete(file: Path):
    return (file.is_dir()
        and (file / "val.parquet").exists()
        and (file / "test_click.parquet").exists()
        and (file / "test_rel.parquet").exists())

def parse_model_name(path: Path):
    directory = path.name
    options = {}

    for option in directory.split(","):
        k, v = option.split("=")
        options[k] = v

    return options

def parse_result_file(run: Path, file: str):
    options = parse_model_name(run)
    
    test_rel_df = pd.read_parquet(run / file)
    test_rel_df["run"] = run.name
    test_rel_df["model"] = options["model"]
    test_rel_df["data"] = options["data"]
    test_rel_df["random_state"] = options["random_state"]
    
    return test_rel_df

def load_data(data, file: str):
    data_path = directory / data
    runs = [f for f in data_path.iterdir() if run_complete(f)]
    print(f"Loaded {len(runs)} run(s) for {data}")

    return pd.concat([parse_result_file(run, file) for run in runs])

In [35]:
rel_df = pd.concat([load_data(d, "test_rel.parquet") for d in data])
rel_df.head()

Loaded 55 run(s) for baidu
Loaded 55 run(s) for ltr
Loaded 55 run(s) for uva


Unnamed: 0,dcg@01,dcg@03,dcg@05,dcg@10,frequency_bucket,mrr@10,ndcg@10,query_id,run,model,data,random_state
0,3.0,4.5,8.675295,13.93903,8,1.0,0.4734,1,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906
1,0.0,0.0,0.0,0.0,9,0.0,0.0,2,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906
2,1.0,6.916508,6.916508,9.765993,3,1.0,0.264485,3,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906
3,0.0,0.5,0.5,1.718555,8,0.333333,0.145399,4,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906
4,0.0,3.5,3.5,5.708254,6,0.333333,0.324325,5,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906


# Plot Ranking Results

In [140]:
model2name = {
    "naive-pointwise": "Point. Naive",
    "pbm-pointwise": "Point. Two-Tower",
    "regression-em": "Point. RegressionEM",
    "ips-pointwise": "Point. IPS",
    "naive-listwise": "List. Naive",
    "ips-listwise": "List. IPS",
    "dla": "List. DLA",
    "naive-lambda-rank": "LambdaRank Naive",
    "pairwise-debias": "LambdaRank PairD",
}

model2color = {
    "naive-pointwise": "#3182bd",
    "pbm-pointwise": "#6baed6",
    "regression-em": "#9ecae1",
    "ips-pointwise": "#c6dbef",
    "naive-listwise": "#e6550d",
    "ips-listwise": "#fd8d3c",
    "dla": "#fdae6b",
    "naive-lambda-rank": "#31a354",
    "pairwise-debias": "#74c476",
}

data2name = {
    "baidu": "Baidu BERT Embeddings",
    "uva": "Our BERT Embeddings",
    "ltr": "LTR Features"
}

metric = "dcg@10"

In [186]:
metric_df = rel_df.groupby(["data", "model", "random_state"]).aggregate({metric: "mean"}).reset_index()
metric_df = metric_df[metric_df.model.map(lambda x: x in model2name)]
metric_df["color"] = metric_df["model"].map(model2color)
metric_df["model"] = metric_df["model"].map(model2name)
metric_df["data"] = metric_df["data"].map(data2name)

base = alt.Chart(metric_df, width=275)

bars = base.mark_bar().encode(
    x=alt.X("model", title=None, sort=list(model2name.values()), scale=alt.Scale(paddingOuter=0.1)).axis(labelAngle=45, offset=2),
    y=alt.Y(f"mean({metric})").scale(zero=False),
    color=alt.Color("model", title=None, legend=None).scale(range={"field": "color"}),
    tooltip=["model", f"mean({metric}):Q"],
)

error = base.mark_errorbar(extent="ci").encode(
    x=alt.X("model", sort=list(model2name.values())),
    y=alt.Y(metric, title=metric.upper()),
    strokeWidth=alt.value(4)
)

baseline = alt.Chart(pd.DataFrame({"dcg@10": [6.5]})).mark_rule(strokeDash=[4,2], opacity=0.2).encode(
    y=alt.Y(metric, title=metric.upper()),
    color=alt.value("#393b79"),
    strokeWidth=alt.value(2)
)

chart = (bars + error + baseline).facet(
    column=alt.Column("data", title="", sort=list(data2name.values())),
    spacing=10,
)

chart

In [168]:
svg_file = Path("figures/") / f"{metric}.svg"
pdf_file = Path("figures/") / f"{metric}.pdf"
# to .svg
chart.save(svg_file)
# to .pdf
!rsvg-convert -f pdf {svg_file} > {pdf_file}

# Plot Click Prediction

In [190]:
click_df = pd.concat([load_data(d, "test_click.parquet") for d in data])
click_df.head()

Loaded 55 run(s) for baidu
Loaded 55 run(s) for ltr
Loaded 55 run(s) for uva


Unnamed: 0,BC_dcg@01,BC_dcg@03,BC_dcg@05,BC_dcg@10,BC_mrr@10,BC_ndcg@10,loss,nll,query_id,run,model,data,random_state
0,1.0,1.335689,1.488982,1.547669,1.0,0.991052,0.887683,0.26901,22618,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906
1,0.414214,1.175104,1.262859,1.434971,0.5,0.863773,1.805299,0.573614,572293,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906
2,0.414214,1.175104,1.258817,1.431645,0.5,0.861771,2.25022,0.484514,516399,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906
3,0.414214,0.569263,0.676938,0.832772,0.0,0.949864,2.08002,0.255486,551606,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906
4,0.090508,0.481808,0.932702,1.092055,0.2,0.657357,2.224518,0.423497,285250,"data=baidu,es_patience=5,logging=True,max_epoc...",ips-listwise,baidu,1906


In [191]:
data2name = {
    "baidu": "Baidu BERT",
    "uva": "Our BERT",
    "ltr": "LTR"
}

model2name = {
    "naive-pointwise": "Naive",
    "pbm-pointwise": "Two-Tower",
    "regression-em": "RegressionEM",
    "ips-pointwise": "IPS",
    "naive-listwise": "List. Naive",
    "ips-listwise": "List. IPS",
    "dla": "List. DLA",
    "naive-lambda-rank": "LambdaRank Naive",
    "pairwise-debias": "LambdaRank PairD",
}

In [195]:
metric_df = click_df.groupby(["data", "model", "random_state"]).aggregate({"nll": "mean"}).reset_index()
metric_df["color"] = metric_df["model"].map(model2color)
metric_df["model"] = metric_df["model"].map(model2name)
metric_df["data"] = metric_df["data"].map(data2name)
metric_df = metric_df[(metric_df["nll"] < 1) & metric_df.model.isin(["Naive", "Two-Tower", "RegressionEM", "IPS"])]

base = alt.Chart(metric_df, width=100)

bars = base.mark_bar().encode(
    x=alt.X("model", title=None, sort=list(model2name.values()), scale=alt.Scale(paddingOuter=0.1)).axis(labelAngle=45, offset=2),
    y=alt.Y("mean(nll)", title="Negative Log-Likelihood").scale(zero=False),
    color=alt.Color("model", title=None, legend=None).scale(range={"field": "color"}),
)

error = base.mark_errorbar(extent="ci").encode(
    x=alt.X("model", title=None, sort=list(model2name.values()), scale=alt.Scale(paddingOuter=0.1)).axis(labelAngle=45, offset=2),
    y=alt.Y("nll", title="Negative Log-Likelihood"),
    strokeWidth=alt.value(4)
)

chart = (bars + error).facet(
    column=alt.Column("data", title="", sort=list(data2name.values())),   
    spacing=5
).configure_legend(
    orient="top",
)

chart

In [196]:
svg_file = Path("figures/") / f"nll.svg"
pdf_file = Path("figures/") / f"nll.pdf"
# to .svg
chart.save(svg_file)
# to .pdf
!rsvg-convert -f pdf {svg_file} > {pdf_file}