In [1]:
import altair as alt
import pandas as pd
import seaborn as sns

import torch
import torch.nn.functional as F
from torch import nn

  from .autonotebook import tqdm as notebook_tqdm


In [21]:
class BinaryCrossEntropy(nn.Module):
    def forward(
        self,
        y_predict: torch.Tensor,
        y_true: torch.Tensor,
        eps: float = 1e-10,
    ) -> torch.float:

        loss = -(
            (y_true) * torch.log(y_predict.clip(min=eps))
            + (1 - y_true) * torch.log((1 - y_predict).clip(min=eps))
        )
        return loss.mean()

In [66]:
class CM(nn.Module):
    def __init__(self, p):
        super().__init__()
        self.linear = nn.Linear(5, 1)
        self.p = p
        
    def forward(self, x):
        x = torch.sigmoid(self.linear(x)).squeeze() * self.p
        return x

In [4]:
import torch

from pytorchltr.evaluation import arp, ndcg


def get_metrics(
    y_predict: torch.Tensor, y_true: torch.Tensor, n: torch.Tensor, prefix: str = ""
):
    return {
        f"{prefix}arp": arp(y_predict, y_true, n).mean().detach(),
        f"{prefix}ndcg@1": ndcg(y_predict, y_true, n, k=1).mean().detach(),
        f"{prefix}ndcg@5": ndcg(y_predict, y_true, n, k=5).mean().detach(),
        f"{prefix}ndcg@10": ndcg(y_predict, y_true, n, k=10).mean().detach(),
        f"{prefix}ndcg": ndcg(y_predict, y_true, n).mean().detach(),
    }

In [67]:
n_results = 100
y = torch.randint(5, (3, n_results))
x = F.one_hot(y).float()
rows = []

epochs = 100
lr = 0.1
y = 0.1 + 0.9 * (2 ** y - 1) / (2 ** 4 - 1)

for eta in range(10):
    grad = []    
   
    #p = (1 / (1 + torch.arange(n_results))) ** eta
    p = 1 / (1 + eta)
    c = y * p

    model = CM(p)
    loss = BinaryCrossEntropy()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)

    for i in range(epochs):
        y_predict = model(x)

        l = loss(y_predict, c)

        optimizer.zero_grad()
        l.backward()
        optimizer.step()
        grad.append(model.linear.weight.grad.squeeze())
        
    rows.append({
        "eta": eta,
        "grad_mean": float(torch.stack(grad).abs().mean()),
        "grad_std": float(torch.stack(grad).abs().std())
    })
    y_predict = torch.sigmoid(model.linear(x))
    print(get_metrics(y_predict, y, torch.full((3,), n_results)))

{'arp': tensor(29.2660), 'ndcg@1': tensor(1.), 'ndcg@5': tensor(1.), 'ndcg@10': tensor(1.), 'ndcg': tensor(1.)}
{'arp': tensor(34.3368), 'ndcg@1': tensor(0.4340), 'ndcg@5': tensor(0.4340), 'ndcg@10': tensor(0.4340), 'ndcg': tensor(0.7128)}
{'arp': tensor(36.8582), 'ndcg@1': tensor(1.), 'ndcg@5': tensor(1.), 'ndcg@10': tensor(1.), 'ndcg': tensor(0.9326)}
{'arp': tensor(39.4568), 'ndcg@1': tensor(0.4340), 'ndcg@5': tensor(0.4340), 'ndcg@10': tensor(0.4340), 'ndcg': tensor(0.6326)}
{'arp': tensor(42.3736), 'ndcg@1': tensor(0.2142), 'ndcg@5': tensor(0.2142), 'ndcg@10': tensor(0.2142), 'ndcg': tensor(0.5186)}
{'arp': tensor(36.4034), 'ndcg@1': tensor(1.), 'ndcg@5': tensor(1.), 'ndcg@10': tensor(1.), 'ndcg': tensor(0.9313)}
{'arp': tensor(36.4034), 'ndcg@1': tensor(1.), 'ndcg@5': tensor(1.), 'ndcg@10': tensor(1.), 'ndcg': tensor(0.9313)}
{'arp': tensor(50.6860), 'ndcg@1': tensor(0.0718), 'ndcg@5': tensor(0.0718), 'ndcg@10': tensor(0.0718), 'ndcg': tensor(0.3864)}
{'arp': tensor(54.9894), 'nd

In [227]:
class CM(nn.Module):
    def __init__(self, p):
        super().__init__()
        self.linear = nn.Sequential(nn.Linear(2, 1))
        self.p = p
        
    def forward(self, x):
        x = torch.sigmoid(self.linear(x)).squeeze() * self.p
        return x

In [261]:
n_results = 100
y = torch.randint(5, (3, n_results))
x = F.one_hot(y).float()
rows = []

epochs = 10
lr = 0.1
y = 0.1 + 0.9 * (2 ** y - 1) / (2 ** 4 - 1)

x = torch.tensor([
    [1, 0],
    [1, 0]
]).float()

y = torch.tensor([
    0.1, 0.9,
])

p = torch.tensor([
    [1.0, 0.0001],
])


c = y * p

model = CM(p)
loss = BinaryCrossEntropy()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for i in range(epochs):
    y_predict = model(x)

    l = loss(y_predict, c)

    optimizer.zero_grad()
    l.backward()
    optimizer.step()

y_predict = torch.sigmoid(model.linear(x)).squeeze()
(y - y_predict) ** 2, y_predict

(tensor([4.0771e-04, 6.0810e-01], grad_fn=<PowBackward0>),
 tensor([0.1202, 0.1202], grad_fn=<SqueezeBackward0>))

In [194]:
class BinaryCrossEntropyWithLogits(nn.Module):
    def forward(
        self,
        y_predict: torch.Tensor,
        y_true: torch.Tensor,
        position_bias: torch.Tensor,
        eps: float = 1e-10,
    ) -> torch.float:
        """
        Binary Cross-Entropy with IPS as in Bekker2019, Saito2020, Oosterhuis2022
        https://arxiv.org/pdf/2203.17118.pdf
        https://arxiv.org/pdf/1909.03601.pdf

        Args:
            y_predict: Tensor of size (n_batch, n_results) with predicted relevance
            y_true: Tensor of size (n_batch, n_results) with ground_truth scores
            position_bias: Tensor of size (n_results) with propensities per rank
            clip: Min propensity used to clip position_bias
            eps: Min value to avoid ln(0) = -inf

        Returns:
            Mean aggregated loss for the given batch
        """
        y_predict = torch.sigmoid(y_predict)
        position_bias = position_bias.type_as(y_predict)

        loss = -(
            (y_true / position_bias) * torch.log(y_predict.clip(min=eps))
            + (1 - (y_true / position_bias)) * torch.log((1 - y_predict).clip(min=eps))
        )

        return loss.sum(dim=1).mean()

In [207]:
class IPS(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(2, 1)
        
    def forward(self, x):
        x = self.linear(x)
        x = nn.ELU()(x).squeeze()
        return x

In [257]:
n_results = 100
y = torch.randint(5, (3, n_results))
x = F.one_hot(y).float()
rows = []

epochs = 100
lr = 0.1
y = 0.1 + 0.9 * (2 ** y - 1) / (2 ** 4 - 1)

x = torch.tensor([
    [1, 0],
    [1, 0]
]).float()

y = torch.tensor([
    0.1, 0.9,
])

p = torch.tensor([
    [1.0, 0.0001],
])

c = y * p

model = IPS()
loss = BinaryCrossEntropyWithLogits()
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for i in range(epochs):
    y_predict = model(x)

    l = loss(y_predict, c, p)

    optimizer.zero_grad()
    l.backward()
    optimizer.step()

y_predict = torch.sigmoid(model.linear(x)).squeeze()
(y - y_predict) ** 2, y_predict

(tensor([0.1605, 0.1595], grad_fn=<PowBackward0>),
 tensor([0.5007, 0.5007], grad_fn=<SqueezeBackward0>))

In [11]:
alt.Chart(pd.DataFrame(rows)).mark_line().encode(
    x="eta",
    y="grad_mean"
)