In [1]:
import altair as alt
import pandas as pd

from pathlib import Path
from theme import theme

In [2]:
alt.themes.register("latex", theme)
alt.themes.enable("latex")

ThemeRegistry.enable('latex')

In [3]:
name = "position-bias"
directory = Path("figures/")

## Figure 2: Position bias estimates on Baidu ULTR

In [203]:
method2name = {
    "ctr": "CTR",
    "RegressionEM": "REM",
    "global_all_pairs": "All Pairs",
    "adjacent_chain": "Adjacent Chain",
    "pivot_one": "Pivot One",
}

In [204]:
dfs = []
path = Path("../propensities")

for file in path.glob("*.csv"):
    df = pd.read_csv(file)

    method = df.columns[1]
    df["name"] = method2name[method]
    df = df.rename(columns={method: "examination"})
    dfs.append(df)

df = pd.concat(dfs)
df.head()

Unnamed: 0,position,examination,name
0,1,1.0,All Pairs
1,2,0.673779,All Pairs
2,3,0.414474,All Pairs
3,4,0.293205,All Pairs
4,5,0.207862,All Pairs


## RegressionEM

In [208]:
files = list(Path("outputs/propensity-estimation").glob("*/exam.parquet"))
rem_df = pd.concat([pd.read_parquet(f) for f in files])
rem_df = rem_df.rename(columns={"model": "name"})
rem_df["name"] = rem_df["name"].map(method2name)
rem_df.head()

Unnamed: 0,name,position,examination
0,REM,1,1.0
1,REM,2,0.638155
2,REM,3,0.416706
3,REM,4,0.357961
4,REM,5,0.226802


In [209]:
df = pd.concat([df, rem_df])
df = df[df["position"] <= 10]
ctr_df = df[df["name"] == "CTR"].copy()
harvesting_df = df[df["name"] != "CTR"].copy()
order = list(method2name.values())

In [210]:
mehtod2zindex = {m: i for i, m in enumerate(method2name.values())}
harvesting_df["z-index"] = harvesting_df.name.map(mehtod2zindex)
harvesting_df = harvesting_df.sort_values("z-index", ascending=False)

In [214]:
ctr = alt.Chart(ctr_df, width=425).mark_line(size=2, opacity=1, point=True, strokeDash=[8,2]).encode(
    x=alt.X("position:Q", title="Position").scale(domain=(1, 10)),
    y=alt.Y("examination", title="Normalized Propensity"),
)

harvesting = alt.Chart(harvesting_df, width=425).mark_line(size=2, opacity=0.8, point=True).encode(
    x=alt.X("position:Q", title="Position").scale(domain=(1, 10)),
    y=alt.Y("mean(examination)", title="Normalized Propensity"),
    color=alt.Color("name", title="", sort=order).scale(domain=order, ),
) 

error = alt.Chart(harvesting_df, width=425).mark_errorband(opacity=0.5, extent="ci").encode(
    x=alt.X("position:Q", title="Position"),
    y=alt.Y("examination", title="Normalized Propensity"),
    color=alt.Color("name", title="", sort=order).scale(domain=order, range=["#393b79", "#31a354", "#e6550d", "#fd8d3c", "#fdae6b",]),
)

chart = (ctr + harvesting + error).configure_point(
    size=30
).configure_legend(
    orient="top",
    direction="horizontal",
)

chart

In [215]:
svg_file = directory / f"{name}.svg"
pdf_file = directory / f"{name}.pdf"

In [216]:
# to .svg
chart.save(svg_file)
# to .pdf
!rsvg-convert -f pdf {svg_file} > {pdf_file}