# Simulate a Dataset
* Assume we have a query $x$, to which we elicit different responses $y_i$.
* The responses are e.g. letters from A to Z. So $n = 25$.
* They are ordered in a random, but specific order.
* We draw 100 pairwise comparisons
* The query $x$ is assumed to be constant, so we will disregard it in the simulation.
* The reward $r(y, x)$ becomes $r(y)$. Its value is hard-coded to be $1/position(y)$, e.g. if C is the 24th letter in the order, then
$$
r(C) = 1 / 24
$$

In [102]:
import string
import random

letters = list(string.ascii_uppercase)

In [103]:
shuffled = list(letters)
random.seed(0)
random.shuffle(shuffled)

letter2r = {l: 1.0/idx for idx, l in enumerate(shuffled, 1)}
assert letter2r["A"] == 0.5
assert letter2r["B"] == 1.0 / 23

In [69]:
letter2r

{'O': 1.0,
 'A': 0.5,
 'X': 0.3333333333333333,
 'S': 0.25,
 'G': 0.2,
 'F': 0.16666666666666666,
 'H': 0.14285714285714285,
 'K': 0.125,
 'W': 0.1111111111111111,
 'U': 0.1,
 'E': 0.09090909090909091,
 'C': 0.08333333333333333,
 'V': 0.07692307692307693,
 'D': 0.07142857142857142,
 'R': 0.06666666666666667,
 'L': 0.0625,
 'T': 0.058823529411764705,
 'J': 0.05555555555555555,
 'Z': 0.05263157894736842,
 'P': 0.05,
 'Q': 0.047619047619047616,
 'I': 0.045454545454545456,
 'B': 0.043478260869565216,
 'N': 0.041666666666666664,
 'Y': 0.04,
 'M': 0.038461538461538464}

In [73]:
from math import exp

def p_star(y1, y2) -> float:
    r1 = letter2r[y1]
    r2 = letter2r[y2]
    return exp(r1) / (exp(r1) + exp(r2))

assert p_star("A", "A") == .5
assert p_star("O", "A") > p_star("A", "O")

In [112]:
import pandas as pd
import numpy as np

# draw 1000 pairs for comparison
random.seed(0)
D = [random.choices(letters, k=2) for _ in range(100)]
assert len(D) == 100

# Add p* to pair
D = [(y1, y2, p_star(y1, y2)) for y1, y2 in D]

df = pd.DataFrame(D, columns=["y1", "y2", "p*"])
np.random.seed(0)
df["random"] = np.random.random(size=len(df))
df["yw"] = np.where(df["random"] < df["p*"], df["y1"], df["y2"])
df["yl"] = np.where(df["random"] < df["p*"], df["y2"], df["y1"])
df.head()

Unnamed: 0,y1,y2,p*,random,yw,yl
0,V,T,0.504525,0.548814,T,V
1,K,G,0.481259,0.715189,G,K
2,N,K,0.479179,0.602763,K,N
3,U,H,0.489287,0.544883,H,U
4,M,P,0.497115,0.423655,M,P


# Define a Model for $r_{\theta}(y)$
* The training data is only pairwise comparisons with winner and loser $(y_w, y_l)$
* Our model must outputs the prediction $r_{\theta}(y)$, and has parameters $\theta$.
* $\theta$ has as many dimensions as there are letters, and each parameter encodes the predicted value of $r$ for each letter.

In [116]:
df_train = df[["yw", "yl"]]
df_train.head()

Unnamed: 0,yw,yl
0,T,V
1,G,K
2,K,N
3,H,U
4,M,P


In [123]:
import torch
from torch import nn

class Ranker(nn.Module):
    def __init__(self):
        super().__init__()
        self.params = nn.Parameter(torch.zeros(len(letters)))

    def forward(self, letters):  # B x n
        rs = letters @ self.params  # B x 1
        return rs

r = Ranker()

In [126]:
# Encode letters as one-hot vectors
vecs = torch.eye(len(letters))

letter2vec = dict(zip(letters, vecs))

letter2vec["Z"]

def encode(ls: list[str]) -> torch.Tensor:
    return torch.stack([letter2vec[l] for l in ls])

encode(["A", "B", "C"])

tensor([[1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0., 0., 0., 0., 0., 0., 0.]])

In [132]:
r(encode(["A", "B", "C"]))
r(encode(df_train["yw"]))

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.], grad_fn=<MvBackward0>)

# Define a Loss

In [133]:
def loss(r_w_pred, r_l_pred) -> torch.Tensor:
    return -torch.sigmoid(r_w_pred - r_l_pred).log().mean()

# Train the Model

In [140]:
model = Ranker()
opt = torch.optim.Adam(lr=.1, params=model.parameters())

for _ in range(40):
    opt.zero_grad()
    r_w_pred = model(encode(df_train["yw"]))
    r_l_pred = model(encode(df_train["yl"]))
    l = loss(r_w_pred, r_l_pred)
    print("loss:", l.item())
    l.backward()
    opt.step()

loss: 0.6931471228599548
loss: 0.6622939109802246
loss: 0.6357548236846924
loss: 0.6134393811225891
loss: 0.5950097441673279
loss: 0.5799616575241089
loss: 0.5677114725112915
loss: 0.5577043294906616
loss: 0.5494194030761719
loss: 0.5424333810806274
loss: 0.5364360213279724
loss: 0.5312089323997498
loss: 0.526618480682373
loss: 0.5225934386253357
loss: 0.5190905928611755
loss: 0.5160790085792542
loss: 0.5135298371315002
loss: 0.511406421661377
loss: 0.5096655488014221
loss: 0.5082651376724243
loss: 0.5071642398834229
loss: 0.5063221454620361
loss: 0.5057000517845154
loss: 0.5052586197853088
loss: 0.5049564242362976
loss: 0.5047531723976135
loss: 0.5046135187149048
loss: 0.504507303237915
loss: 0.504411518573761
loss: 0.5043113231658936
loss: 0.5041975975036621
loss: 0.5040654540061951
loss: 0.5039156079292297
loss: 0.503751814365387
loss: 0.5035796761512756
loss: 0.5034064054489136
loss: 0.5032384395599365
loss: 0.5030795931816101
loss: 0.5029317736625671
loss: 0.502795398235321


In [149]:
model.params

Parameter containing:
tensor([-0.1067, -0.4513,  1.4239, -0.3265, -2.3796, -0.7214,  1.9594,  2.1371,
        -0.5861, -1.1844,  0.7986,  0.7001,  0.7843, -2.7167,  2.1669,  0.3370,
         0.4379, -0.3226, -1.5580, -0.0289,  0.7775,  1.3212, -1.3243, -2.1043,
        -1.4263, -2.1402], requires_grad=True)