# naive vs IPS Estimator

In [1]:
import numpy as np

In [2]:
# ユーザは一人 (ユーザID)
u = ["1"]

# I = {i_1, i2} アイテムは２種類 (アイテムID)
item = ["1", "2"]

In [3]:
# 嗜好度合いr(u,i)
rui = [1.0, 0.5]
# ポジションごとの見られやすさ sheta(y(i))
THETA = [1.0, 0.1]
# ２つのスコアリング関数f1, f2に基づく順位
f1 = ["1", "2"]
f2 = ["2", "1"]
# なおログデータはf2が動いていた状況 D(u,y_f2,c)

item_id2index = dict(zip(item, range(len(item))))

# f1, f2の真のランキング性能

In [4]:
J_f1 = np.sum([rui[i]*int(f1[i]) for i in range(len(item))])
print(f"J(f1) = {J_f1}")

J(f1) = 2.0


In [5]:
J_f2 = np.sum([rui[i]*int(f2[i]) for i in range(len(item))])
print(f'J(f1) = {J_f2}')

J(f1) = 2.5


In [6]:
# クリック発生パターン
click = np.array([
    [1,1],[1,0],[0,1],[0,0]
])
print(click)

[[1 1]
 [1 0]
 [0 1]
 [0 0]]


In [7]:
# クリック４パターンの確率
# C(u,i,yf2(i)) = THETA(yf2(i))*r(u,i)
click_p = []
for c in range(len(click)):
    temp = []
    for i in range(len(item)):
        PBM = THETA[item_id2index[f2[i]]]*rui[i]
        if (click[c,i] == 0):
            temp.append(1-PBM)
            continue
        temp.append(PBM)
    click_p.append(np.prod(temp))

In [8]:
print(f"click proba is {click_p}")

click proba is [0.05, 0.05, 0.45, 0.45]


# f1, f2のnaive推定量

In [9]:
naive_f1 = []
naive_f2 = []
for c in range(len(click)):
    esti_f1 = np.sum([click[c,i]*int(f1[i]) for i in range(len(item))])
    esti_f2 = np.sum([click[c,i]*int(f2[i]) for i in range(len(item))])
    naive_f1.append(float(esti_f1))
    naive_f2.append(float(esti_f2))

In [10]:
print(f"J^naive(f1) = {naive_f1}, J^naive(f2) = {naive_f2}")

J^naive(f1) = [3.0, 1.0, 2.0, 0.0], J^naive(f2) = [3.0, 2.0, 1.0, 0.0]


# f1, f2のIPS推定量

In [11]:
item_id2index[f2[0]]
ips_f1 = []
ips_f2 = []
for c in range(len(click)):
    esti_f1 = np.sum([
        (click[c,i]/THETA[item_id2index[f2[i]]])*int(f1[i]) for i in range(len(item))
    ])
    esti_f2 = np.sum([
        (click[c,i]/THETA[item_id2index[f2[i]]])*int(f2[i]) for i in range(len(item))
    ])
    ips_f1.append(esti_f1)
    ips_f2.append(esti_f2)

In [12]:
print(f"J^IPS(f1) = {ips_f1}, J^IPS(f2) = {ips_f2}")

J^IPS(f1) = [12.0, 10.0, 2.0, 0.0], J^IPS(f2) = [21.0, 20.0, 1.0, 0.0]


# naive, IPS推定量の期待値

In [13]:
Ef1_naive = np.dot(click_p, naive_f1)
Ef2_naive = np.dot(click_p, naive_f2)
Ef1_ips = np.dot(click_p, ips_f1)
Ef2_ips = np.dot(click_p, ips_f2)

In [14]:
import pandas as pd

data = [
    {"スコアリング関数": "f1", "J": J_f1, "E[J^naive]": Ef1_naive, "E[J^IPS]": Ef1_ips},
    {"スコアリング関数": "f2", "J": J_f2, "E[J^naive]": Ef2_naive, "E[J^IPS]": Ef2_ips},
    
]

pd.DataFrame(data)

Unnamed: 0,スコアリング関数,J,E[J^naive],E[J^IPS]
0,f1,2.0,1.1,2.0
1,f2,2.5,0.7,2.5


### これよりnaive推定量には人気商品ばかり推薦するバイアスが生じている。つまり、新しくこのプラットフォームに掲載する企業の商品は推薦されにくいという機会損失がある。正しく推薦できないと推薦する意味がない