In [1]:
%cd ..

/Users/philipphager/Developer/ultr-cm-vs-ips


In [148]:
import altair as alt
import torch
import pandas as pd
import torch.nn.functional as F


from altair_saver import save
from src.loss import BinaryCrossEntropy

In [145]:
from torch import nn
from typing import Optional

class BinaryCrossEntropyWithLogits(nn.Module):
    def forward(
        self,
        y_predict: torch.Tensor,
        y_true: torch.Tensor,
        n: torch.Tensor,
        position_bias: Optional[torch.Tensor] = None,
        clip: Optional[float] = None,
        eps: float = 1e-10,
    ) -> torch.float:
        """
        Binary Cross-Entropy with IPS as in Bekker2019, Saito2020, Oosterhuis2022
        https://arxiv.org/pdf/2203.17118.pdf
        https://arxiv.org/pdf/1909.03601.pdf

        Args:
            y_predict: Tensor of size (n_batch, n_results) with predicted relevance
            y_true: Tensor of size (n_batch, n_results) with ground_truth scores
            position_bias: Tensor of size (n_results) with propensities per rank
            clip: Min propensity used to clip position_bias
            eps: Min value to avoid ln(0) = -inf

        Returns:
            Mean aggregated loss for the given batch
        """
        if position_bias is None:
            position_bias = torch.ones_like(y_true)

        if clip is not None:
            position_bias = position_bias.clip(min=clip)

        position_bias = position_bias.type_as(y_predict)

        loss = -(
            (y_true / position_bias) * torch.log(y_predict.clip(min=eps))
            + (1 - (y_true / position_bias)) * torch.log((1 - y_predict).clip(min=eps))
        )

        return loss.sum(dim=1).mean()

In [146]:
ips_fn = BinaryCrossEntropyWithLogits()
o = 1 / torch.arange(1, 11)
y = 0.5
rows = []

for o_k in o:
    c_d = y * o_k
    c_d = c_d.unsqueeze(0).unsqueeze(0)
    
    for y_predict in torch.arange(0, 101) / 100:
        y_predict = y_predict.unsqueeze(0).unsqueeze(0)
        n = torch.tensor([1])
        
        loss = ips_fn(y_predict, c_d, n, o_k.unsqueeze(0))
        rows.append({
            "examination": float(o_k),
            "y_predict": float(y_predict[[0]]),
            "y_true": y,
            "loss": float(loss),
            "model": "IPS"
        })
        
ips_df = pd.DataFrame(rows)
ips_df.head()

Unnamed: 0,examination,y_predict,y_true,loss,model
0,1.0,0.0,0.5,11.512925,IPS
1,1.0,0.01,0.5,2.30761,IPS
2,1.0,0.02,0.5,1.966113,IPS
3,1.0,0.03,0.5,1.768509,IPS
4,1.0,0.04,0.5,1.629849,IPS


In [150]:
cm_fn = BinaryCrossEntropy()

In [116]:
o = 1 / torch.arange(1, 11)
y = 0.5
rows = []

for o_k in o:
    c_d = y * o_k
    c_d = c_d.unsqueeze(0).unsqueeze(0)

    for y_predict in torch.arange(0, 101) / 100:
        c_predict = (y_predict * o_k).unsqueeze(0).unsqueeze(0)
        n = torch.tensor([1])
        
        loss = cm_fn(c_predict, c_d, n)
        rows.append({
            "examination": float(o_k),
            "y_predict": float(y_predict),
            "y_true": y,
            "loss": float(loss),
            "model": "CM"
        })
        
cm_df = pd.DataFrame(rows)
cm_df.head()

Unnamed: 0,examination,y_predict,y_true,loss,model
0,1.0,0.0,0.5,11.512925,CM
1,1.0,0.01,0.5,2.30761,CM
2,1.0,0.02,0.5,1.966113,CM
3,1.0,0.03,0.5,1.768509,CM
4,1.0,0.04,0.5,1.629849,CM


In [142]:
source = cm_df
min_source = cm_df.sort_values(["examination", "loss"]).groupby(["examination"]).head(1)

cm_chart = alt.Chart(source, width=300, height=250, title="Neural PBM").mark_line(clip=True).encode(
    x=alt.X("y_predict", title="Estimated Relevance P(ŷ = 1 | d)"),
    y=alt.Y("loss", scale=alt.Scale(domain=(0, 2.0)), title="Click Model - Binary Cross Entropy"),
    color=alt.Color("examination:Q", title="P(O = 1 | k)")
) + alt.Chart(min_source).mark_circle(size=40, opacity=1.0).encode(
    x="y_predict",
    y=alt.Y("loss"),
)

source = ips_df
min_source = ips_df.sort_values(["examination", "loss"]).groupby(["examination"]).head(1)

ips_chart = alt.Chart(source, width=300, height=250, title="Pointwise IPS").mark_line(clip=True).encode(
    x=alt.X("y_predict", title="Estimated Relevance P(ŷ = 1 | d)"),
    y=alt.Y("loss", scale=alt.Scale(domain=(0, 2.0)), title="IPS-Corrected Binary Cross Entropy Loss"),
    color=alt.Color("examination:Q")
) + alt.Chart(min_source).mark_circle(size=40, opacity=1.0).encode(
    x="y_predict",
    y=alt.Y("loss"),
)

chart = (cm_chart | ips_chart).configure_legend(
    titleFont="serif",
    titleFontSize=14,
    labelFont="serif",
    labelFontSize=14,
    columnPadding=20,
).configure_title(
    fontSize=14,
    fontWeight="normal",
    font="serif"
).configure_axis(
    titlePadding=10,
    titleFontSize=14,
    titleFontWeight="normal",
    titleFont="serif",
    labelFontSize=10,
    labelFontWeight="normal",
    labelFont="serif",
    tickCount=8
)

save(chart, "figures/loss.pdf")
chart

In [396]:
def train_cm(x, y, o, name):
    rows = []
    epochs = 100
    lr = 0.1
    cm_fn = BinaryCrossEntropy()
    c_d = y * o

    for y_predict in torch.arange(0, 101) / 100:
        y_predict = y_predict.repeat((2,))

        c_predict = (y_predict * o)
        n = torch.tensor([2])

        loss = cm_fn(c_predict, c_d, n)
        rows.append({
            "examination": o.numpy(),
            "y_predict": float(y_predict[0]),
            "y_true": y.numpy(),
            "loss": float(loss),
            "model": name
        })

    return pd.DataFrame(rows)
    
cm_item_a = train_cm(
    x=torch.tensor([
        [1, 0],
    ]).float(),
    y=torch.tensor([
        0.1
    ]),
    o=torch.tensor([
        [1.0],
    ]),
    name="y = 0.1, o = 1.0"
)

cm_item_b = train_cm(
    x=torch.tensor([
        [1, 0],
    ]).float(),
    y=torch.tensor([
        0.9
    ]),
    o=torch.tensor([
        [0.1],
    ]),
    name="y = 0.8, o = 0.1"
)

cm_item_combined = train_cm(
    x=torch.tensor([
        [1, 0],
        [1, 0]
    ]).float(),
    y=torch.tensor([
        0.1, 0.9
    ]),
    o=torch.tensor([
        [1.0, 0.1],
    ]),
    name="Combined"
)

In [417]:
source = pd.concat([cm_item_a, cm_item_b, cm_item_combined])
min_source = source[["model", "y_predict", "loss"]].sort_values(["model", "loss"]).groupby(["model"]).head(1)

cm_chart = (alt.Chart(source, width=300, height=250, title="Neural PBM").mark_line(clip=True).encode(
    x=alt.X("y_predict", title="Estimated Relevance P(ŷ = 1 | d)"),
    y=alt.Y("loss", scale=alt.Scale(domain=(0, 2.0)), title="Click Model - Binary Cross Entropy"),
    color=alt.Color("model", legend=None),
    strokeDash=alt.condition(
        alt.datum.model == "Combined",
        alt.value([0, 0]),  # dashed line: 5 pixels  dash + 5 pixels space
        alt.value([2, 2]),  # solid line
    )
) + alt.Chart(min_source).mark_point(size=40, opacity=1.0).encode(
    x="y_predict",
    y=alt.Y("loss"),
    color=alt.Color("model", legend=None),
    shape=alt.Shape("model", legend=None),
    tooltip="y_predict"
)).resolve_scale(
    color="independent",
    shape="independent"
)

cm_chart

In [401]:
def train_ips(x, y, o, name):
    rows = []
    epochs = 100
    lr = 0.1
    ips_fn = BinaryCrossEntropyWithLogits()
    c_d = y * o

    for y_predict in torch.arange(0, 101) / 100:
        y_predict = y_predict.repeat((2,))

        c_predict = (y_predict * o)
        n = torch.tensor([2])

        loss = ips_fn(y_predict, c_d, n, o)
        rows.append({
            "examination": o.numpy(),
            "y_predict": float(y_predict[0]),
            "y_true": y.numpy(),
            "loss": float(loss),
            "model": name
        })

    return pd.DataFrame(rows)
    
ips_item_a = train_ips(
    x=torch.tensor([
        [1, 0],
    ]).float(),
    y=torch.tensor([
        0.1
    ]),
    o=torch.tensor([
        [1.0],
    ]),
    name="y = 0.1, o = 1.0"
)

ips_item_b = train_ips(
    x=torch.tensor([
        [1, 0],
    ]).float(),
    y=torch.tensor([
        0.9
    ]),
    o=torch.tensor([
        [0.1],
    ]),
    name="y = 0.8, o = 0.1"
)

ips_item_combined = train_ips(
    x=torch.tensor([
        [1, 0],
        [1, 0]
    ]).float(),
    y=torch.tensor([
        0.1, 0.9
    ]),
    o=torch.tensor([
        [1.0, 0.1],
    ]),
    name="Combined"
)

In [415]:
source = pd.concat([ips_item_a, ips_item_b, ips_item_combined])
min_source = source[["model", "y_predict", "loss"]].sort_values(["model", "loss"]).groupby(["model"]).head(1)

ips_chart = (alt.Chart(source, width=300, height=250, title="Pointwise IPS").mark_line(clip=True).encode(
    x=alt.X("y_predict", title="Estimated Relevance P(ŷ = 1 | d)"),
    y=alt.Y("loss", scale=alt.Scale(domain=(0, 2.0)), title="IPS-Corrected Binary Cross Entropy Loss"),
    color=alt.Color("model", legend=None),
    strokeDash=alt.condition(
        alt.datum.model == "Combined",
        alt.value([0, 0]),  # dashed line: 5 pixels  dash + 5 pixels space
        alt.value([2, 2]),  # solid line
    )
) + alt.Chart(min_source).mark_point(size=40, opacity=1.0).encode(
    x="y_predict",
    y=alt.Y("loss"),
    color="model",
    shape="model",
    tooltip="loss"
)).resolve_scale(
    color="independent",
    shape="independent"
)

ips_chart

In [414]:
chart = (cm_chart | ips_chart).configure_legend(
    orient="right",
    title=None,
    labelFont="serif",
    labelFontSize=14,
).configure_title(
    fontSize=14,
    fontWeight="normal",
    font="serif"
).configure_axis(
    titlePadding=5,
    titleFontSize=14,
    titleFontWeight="normal",
    titleFont="serif",
    labelFontSize=10,
    labelFontWeight="normal",
    labelFont="serif",
    tickCount=8
)

save(chart, "figures/loss_bias.pdf")
chart