In [1]:
import pandas as pd
import numpy as np
from scipy.special import expit
from functools import reduce
from online_logistic_regression import OnlineLogisticRegression


In [2]:
from environment import ContextualEnvironment
from policies import KLUCBSegmentPolicy, RandomPolicy, ExploreThenCommitSegmentPolicy, EpsilonGreedySegmentPolicy, TSSegmentPolicy, LinearTSPolicy
import argparse
import json
import logging
import numpy as np
import pandas as pd
import time

In [3]:
parser = argparse.ArgumentParser()
parser.add_argument("--users_path", type = str, default = "data/user_features.csv", required = False,
                    help = "Path to user features file")
parser.add_argument("--playlists_path", type = str, default = "data/playlist_features.csv", required = False,
                    help = "Path to playlist features file")
parser.add_argument("--output_path", type = str, default = "results.json", required = False,
                    help = "Path to json file to save regret values")
parser.add_argument("--policies", type = str, default = "random,ts-seg-naive", required = False,
                    help = "Bandit algorithms to evaluate, separated by commas")
parser.add_argument("--n_recos", type = int, default = 12, required = False,
                    help = "Number of slots L in the carousel i.e. number of recommendations to provide")
parser.add_argument("--l_init", type = int, default = 3, required = False,
                    help = "Number of slots L_init initially visible in the carousel")
parser.add_argument("--n_users_per_round", type = int, default = 20000, required = False,
                    help = "Number of users randomly selected (with replacement) per round")
parser.add_argument("--n_rounds", type = int, default = 100, required = False,
                    help = "Number of simulated rounds")
parser.add_argument("--print_every", type = int, default = 10, required = False,
                    help = "Print cumulative regrets every 'print_every' round")

args = parser.parse_args(args = [])

In [4]:
playlists_df = pd.read_csv('data/playlist_features.csv')

users_df = pd.read_csv('data/user_features_small.csv')

n_users = len(users_df)
n_playlists = len(playlists_df)

In [5]:
user_features = np.array(users_df.drop(["segment"], axis = 1))
user_features = np.concatenate([user_features, np.ones((n_users,1))], axis = 1)
playlist_features = np.array(playlists_df)

In [6]:
user_segment = np.array(users_df.segment)

In [7]:
user_segment

array([0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [8]:
def set_policies(policies_name, user_segment, user_features, n_playlists):
    # Please see section 3.3 of RecSys paper for a description of policies
    POLICIES_SETTINGS = {
        'random' : RandomPolicy(n_playlists),
        'etc-seg-explore' : ExploreThenCommitSegmentPolicy(user_segment, n_playlists, min_n = 100, cascade_model = True),
        'etc-seg-exploit' : ExploreThenCommitSegmentPolicy(user_segment, n_playlists, min_n = 20, cascade_model = True),
        'epsilon-greedy-explore' : EpsilonGreedySegmentPolicy(user_segment, n_playlists, epsilon = 0.1, cascade_model = True),
        'epsilon-greedy-exploit' : EpsilonGreedySegmentPolicy(user_segment, n_playlists, epsilon = 0.01, cascade_model = True),
        'kl-ucb-seg' : KLUCBSegmentPolicy(user_segment, n_playlists, cascade_model = True),
        'ts-seg-naive' : TSSegmentPolicy(user_segment, n_playlists, alpha_zero = 1, beta_zero = 1, cascade_model = True),
        'ts-seg-pessimistic' : TSSegmentPolicy(user_segment, n_playlists, alpha_zero = 1, beta_zero = 99, cascade_model = True),
        'ts-lin-naive' : LinearTSPolicy(user_features, n_playlists, bias = 0.0, cascade_model = True),
        'ts-lin-pessimistic' : LinearTSPolicy(user_features, n_playlists, bias = -5.0, cascade_model = True),
        # Versions of epsilon-greedy-explore and ts-seg-pessimistic WITHOUT cascade model
        'epsilon-greedy-explore-no-cascade' : EpsilonGreedySegmentPolicy(user_segment, n_playlists, epsilon = 0.1, cascade_model = False),
        'ts-seg-pessimistic-no-cascade' : TSSegmentPolicy(user_segment, n_playlists, alpha_zero = 1, beta_zero = 99, cascade_model = False)
    }

    return [POLICIES_SETTINGS[name] for name in policies_name]

In [9]:
po = 'random,ts-lin-pessimistic,ts-seg-pessimistic'
policies_name = po.split(",")

In [10]:
policies = set_policies(policies_name, user_segment, user_features, n_playlists)

In [11]:
policies_name

['random', 'ts-lin-pessimistic', 'ts-seg-pessimistic']

In [12]:
n_policies = len(policies) # 3
n_users_per_round = args.n_users_per_round
n_rounds = args.n_rounds
overall_rewards = np.zeros((n_policies, n_rounds))
overall_optimal_reward = np.zeros(n_rounds)

In [13]:
cont_env = ContextualEnvironment(user_features, playlist_features, user_segment, args.n_recos)

In [14]:
cont_env.th_rewards

array([0.51069545, 0.60661954, 0.45462272, 0.35597582, 0.45974146,
       0.33826388, 0.94396647, 0.6756706 , 0.35810638])

In [15]:
user_ids = np.random.choice(range(n_users), n_users_per_round) # range * 수

In [16]:
np.take(cont_env.th_rewards, user_ids).sum()

10521.409422646326

In [17]:
n_users = 9
u = 0
step = 100000

In [18]:
probas = np.take(user_features, range(0,9), axis = 0).dot(playlist_features.T)

In [19]:
so = np.argsort(-probas)[:, :12]

In [20]:
so1 = np.take(playlist_features, so, axis = 0)

In [21]:
batch_user_ids = range(0,9)
batch_recos = so
batch_user_features = np.take(user_features, batch_user_ids, axis = 0)              # batch 안에 있는 user에 대한 user_feature만 추출
batch_playlist_features = np.take(playlist_features, batch_recos, axis = 0)    # 추천리스트에 있는 item에 대한 feature 추출
n_users = len(batch_user_ids)                                                       # batch 내에 있는 유저 수
th_reward = np.zeros(n_users)                                                       # zero vector 생성 (user 수)
for i in range(n_users):                                                            # user 수 만큼 for 문 수행
    probas = expit(batch_user_features[i].dot(batch_playlist_features[i].T)) 
    th_reward[i] = 1 - reduce(lambda x,y : x * y, 1 - probas)

In [22]:
batch_user_features[i].dot(batch_playlist_features[i].T)

array([-2.66080412, -2.93343552, -2.95593961, -3.30119431, -3.36031116,
       -3.42852429, -3.46619029, -3.47532706, -3.49337995, -3.56729809,
       -3.62311179, -3.65233738])

In [23]:
expit(batch_user_features[i].dot(batch_playlist_features[i].T)) 

array([0.06532622, 0.05052526, 0.04945654, 0.03553024, 0.03355913,
       0.03141581, 0.03028968, 0.03002246, 0.02950118, 0.02745687,
       0.02600514, 0.02527506])

In [24]:
for i in range(9):                                                            # user 수 만큼 for 문 수행
    probas = expit(np.take(user_features, range(0,9), axis = 0)[i].dot(so1[i].T))        # sigmoid 
    th_reward[i] = 1 - reduce(lambda x,y : x * y, 1 - probas)

In [25]:
th_rewards = np.zeros(user_features.shape[0]) 

In [26]:
th_rewards[0:9] = th_reward

In [27]:
user_segment = np.array(users_df.segment)

In [28]:
user_segment

array([0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [29]:
n_segments = len(np.unique(user_segment))                              # unique 한 segment 수
segment_recos = np.zeros((n_segments, 12), dtype = np.int64)                 # (segment 수, 추천리스트 수) > segment 별로 동일한 추천리스트 생성
for i in range(n_segments):
    mean_probas = np.mean(expit(np.take(user_features, np.where(user_segment == i)[0], axis = 0).dot(playlist_features.T)), axis = 0)
    reward = 1 - reduce(lambda x,y : x * y, 1 + np.sort(-mean_probas)[:12])
    segment_recos[i] = np.argsort(-mean_probas)[:12]

In [30]:
segment_recos

array([[195, 371,  51, 408, 128, 413, 251, 718, 396, 172, 178, 252]],
      dtype=int64)

In [31]:
users_ids = range(u, min(n_users, u + step))

In [32]:
user_segment = np.take(user_segment, users_ids)                    # batch 안에 있는 user의 segment 추출
opt_recos = np.take(segment_recos, user_segment, axis = 0)

In [33]:
th_rewards

array([0.51069545, 0.60661954, 0.45462272, 0.35597582, 0.45974146,
       0.33826388, 0.94396647, 0.6756706 , 0.35810638])

In [34]:
user_ids = np.random.choice(range(n_users), n_users_per_round)                                  # 전체 유저에서 batch 크기 만큼 샘플링 / 중복 유저도 가능한데...?
overall_optimal_reward = np.take(cont_env.th_rewards, user_ids).sum()

In [35]:
for j in range(n_policies):
    # Compute n_recos recommendations
    recos = policies[j].recommend_to_users_batch(user_ids, args.n_recos, args.l_init)
    # Compute rewards
    rewards = cont_env.simulate_batch_users_reward(batch_user_ids= user_ids, batch_recos=recos)
    # Update policy based on rewards
    policies[j].update_policy(user_ids, recos, rewards, args.l_init)
    overall_rewards[j,i] = rewards.sum()
# Print info
if i == 0 or (i+1) % print_every == 0 or i+1 == n_rounds:
    logger.info("Round: %d/%d. Elapsed time: %f sec." % (i+1, n_rounds, time.time() - start_time))
    logger.info("Cumulative regrets: \n%s \n" % "\n".join(["	%s : %s" % (policies_name[j], str(np.sum(overall_optimal_reward - overall_rewards[j]))) for j in range(n_policies)]))



KeyboardInterrupt: 

In [35]:
recos = policies[2].recommend_to_users_batch(user_ids, args.n_recos, args.l_init)

In [36]:
recos.shape

(20000, 12)

In [45]:
def __init__(self, user_features, n_playlists, bias=0.0, cascade_model=True):                       # LinearTSPolicy(user_features, n_playlists, bias = -5.0, cascade_model = True)
    self.user_features = user_features
    n_dim = user_features.shape[1]                                                                  # user feature dimension : 97
    self.n_playlists = n_playlists                                                                  # item 수 : 862
    self.models = [OnlineLogisticRegression(1, 1, n_dim, bias, 15) for i in range(n_playlists)]     # item
    self.m = np.zeros((n_playlists, n_dim))                                                         # item feature matrix
    self.m[:, -1] = bias
    self.q = np.ones((n_playlists, n_dim))
    self.n_dim = n_dim
    self.cascade_model = cascade_model

In [37]:
OnlineLogisticRegression(1, 1, 97, 0, 15).w

array([ 0.3253783 ,  0.78779714,  1.89009634, -2.30583962, -0.86405902,
       -0.75174169,  0.12080632, -2.55030541, -0.0748333 ,  0.50496196,
        0.18839995,  1.29313041, -0.92643271, -0.65008824,  2.22925202,
       -0.12727471,  0.89694574, -0.20996351, -0.96924606, -0.54598823,
       -0.91500641, -0.11683971, -0.10921915, -0.80916804,  1.13970442,
        2.09341808, -1.0072589 , -1.02885565, -0.55029619, -0.88712835,
        0.09623376,  0.7359596 ,  2.21747589,  0.2503986 , -2.52534861,
        1.09520324, -1.08866139, -0.47551696,  1.27435851,  0.5842363 ,
        0.55656989,  0.00591958,  0.11485104, -0.80918276,  0.63692869,
       -0.35121146, -0.45229774,  1.23987654, -0.82766722, -0.44785091,
        1.43075643, -0.31205146, -1.38099833, -1.112707  , -0.05090673,
       -0.51951277, -0.61800099, -2.36417829, -3.22414369,  0.29890316,
       -1.00395265, -1.51411378, -2.39669506,  0.58235952, -0.62578034,
       -0.3932497 ,  0.68594959,  0.0547951 ,  0.08147499, -1.39

In [41]:
user_features = np.take(self.user_features, batch_users, axis=0)                                # batch 내 user의 feature 추출
n_users = len(batch_users)                                                                      # batch 내 유저 수
recos = np.zeros((n_users, n_recos), dtype=np.int64)                                            # 추천리스트 init 생성
step = 1
u = 0
while u < n_users:
    u_next = min(n_users, u+step)
    p_features_sampled =(np.random.normal(self.m, 1/np.sqrt(self.q), size= (u_next-u, self.n_playlists, self.n_dim)))   ###### 시작
    step_p = p_features_sampled.dot(user_features[u:u_next].T)
    for i in range(u_next - u):
        recos[u+i] = np.argsort((-step_p[i,:,i]))[:n_recos]
    u += step
# Shuffle l_init first slots
np.random.shuffle(recos[0:l_init])
return recos

NameError: name 'self' is not defined

In [38]:
m = np.zeros((782, 97))                                                         # item feature matrix
q = np.ones((782, 97))

In [40]:
p_features_sampled = np.random.normal(m,q,size=(1,782,97))

In [41]:
p_features_sampled.shape

(1, 782, 97)

In [42]:
step_p = p_features_sampled.dot(user_features[u:1].T)

In [44]:
np.argsort((-step_p[i,:,i]))[:12]

array([703, 260, 411, 378, 690, 266,  95, 679, 395, 441, 280, 121],
      dtype=int64)

In [45]:
recos = np.zeros((20000, 12), dtype=np.int64)

In [128]:
batch_user_features = np.take(user_features, user_ids, axis = 0)             # 샘플된 유저의 features  (s_user, 97)
batch_playlist_features = np.take(playlist_features, recos, axis = 0)        # 추천리스트의 item feature
n_users = len(user_ids)
n = len(batch_recos[0])
probas = np.zeros((n_users, n))
for i in range(n_users):
    probas[i] = expit(batch_user_features[i].dot(batch_playlist_features[i].T)) # probability to stream each reco
rewards = np.zeros((n_users, n))
i = 0
rewards_uncascaded = np.random.binomial(1, probas) # drawing rewards from probabilities
positive_rewards = set()

In [129]:
nz = rewards_uncascaded.nonzero()

In [130]:
positive_rewards = set()

In [140]:
rewards[1]

array([0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0.])

In [152]:
nz       # 1681

(array([    1,     1,    28, ..., 19983, 19984, 19989], dtype=int64),
 array([ 5, 11,  6, ...,  1,  4, 10], dtype=int64))

In [131]:
for i in range(len(nz[0])):
    if nz[0][i] not in positive_rewards:
        rewards[nz[0][i]][nz[1][i]] = 1
        positive_rewards.add(nz[0][i])

In [None]:
policies[j].update_policy(user_ids, recos, rewards, args.l_init) # 20,000

In [160]:
rewards.shape

(20000, 12)

In [None]:
def update_policy(user_ids, recos , rewards, l_init=3):
    rewards = 2*rewards - 1
    batch_size = len(user_ids)
    modified_playlists ={}
    for i in range(batch_size):
        total_stream = len(rewards[i].nonzero())
        nb_display = 0
        for p, r in zip(recos[i], rewards[i]):
            nb_display +=1
            if p not in modified_playlists:
                modified_playlists[p] = {"X" : [], "Y" : []}
            modified_playlists[p]["X"].append(user_features[user_ids[i]])
            modified_playlists[p]["Y"].append(r)
            if cascade_model and ((total_stream == 0 and nb_display == l_init) or (r == 1)):
                break
    for p,v in modified_playlists.items():
        X = np.array(v["X"])
        Y = np.array(v["Y"])
        models[p].fit(X,Y)
        m[p] = models[p].m
        q[p] = models[p].q
    return

In [161]:
rewards = 2*rewards - 1

In [191]:
user_features[user_ids[0]].shape

(97,)

In [187]:
modified_playlists ={}
total_stream = len(rewards[i].nonzero())
nb_display = 0
for p, r in zip(recos[1], rewards[1]):
    print(p)
    print(r)
    nb_display +=1
    print(modified_playlists)
    if p not in modified_playlists:
        modified_playlists[p] = {"X" : [], "Y" : []}
    modified_playlists[p]["X"].append(user_features[user_ids[i]])   # i번째 유저의 feature
    modified_playlists[p]["Y"].append(r)
    if ((total_stream == 0 and nb_display == l_init) or (r == 1)):
        break

0
-1.0
{}
0
-1.0
{0: {'X': [array([ 3.55553091e-01,  1.31532991e+00, -5.43776512e-01,  2.49324751e+00,
        3.04174900e+00, -3.02621651e+00, -7.34425604e-01,  2.78896272e-01,
        3.52283150e-01,  1.44344413e+00, -8.06229949e-01, -5.83925009e-01,
        6.21066093e-01, -1.05118573e+00,  8.35603297e-01,  4.00180995e-01,
       -1.28851902e+00,  8.46201658e-01, -5.28531551e-01,  8.30040216e-01,
       -1.13057756e+00, -1.23820865e+00, -1.34782064e+00,  8.41158509e-01,
       -1.28062272e+00,  2.08898336e-01, -1.95007038e+00, -6.70083106e-01,
        6.48224473e-01, -9.37087119e-01,  2.10556746e+00, -3.70274067e-01,
        2.56791902e+00, -1.24837029e+00,  1.22563410e+00, -2.18630528e+00,
        4.15721655e-01,  3.67577672e-01,  7.64593601e-01, -1.37893558e+00,
        1.06899589e-01, -2.12456480e-01,  2.43348718e-01,  1.81733295e-01,
       -9.80804563e-01,  3.89862448e-01, -3.43725729e+00,  1.52576901e-03,
        1.15752971e+00,  7.42830217e-01, -2.22440168e-01,  6.64128125e-0

In [213]:
for p,v in modified_playlists.items():
    print(p)
    print(v)
    X = np.array(v["X"])
    Y = np.array(v["Y"])
    #self.models[p].fit(X,Y)
    #self.m[p] = self.models[p].m
    #self.q[p] = self.models[p].q

0
{'X': [array([ 3.55553091e-01,  1.31532991e+00, -5.43776512e-01,  2.49324751e+00,
        3.04174900e+00, -3.02621651e+00, -7.34425604e-01,  2.78896272e-01,
        3.52283150e-01,  1.44344413e+00, -8.06229949e-01, -5.83925009e-01,
        6.21066093e-01, -1.05118573e+00,  8.35603297e-01,  4.00180995e-01,
       -1.28851902e+00,  8.46201658e-01, -5.28531551e-01,  8.30040216e-01,
       -1.13057756e+00, -1.23820865e+00, -1.34782064e+00,  8.41158509e-01,
       -1.28062272e+00,  2.08898336e-01, -1.95007038e+00, -6.70083106e-01,
        6.48224473e-01, -9.37087119e-01,  2.10556746e+00, -3.70274067e-01,
        2.56791902e+00, -1.24837029e+00,  1.22563410e+00, -2.18630528e+00,
        4.15721655e-01,  3.67577672e-01,  7.64593601e-01, -1.37893558e+00,
        1.06899589e-01, -2.12456480e-01,  2.43348718e-01,  1.81733295e-01,
       -9.80804563e-01,  3.89862448e-01, -3.43725729e+00,  1.52576901e-03,
        1.15752971e+00,  7.42830217e-01, -2.22440168e-01,  6.64128125e-01,
        1.535051

In [215]:
X.shape

(6, 97)

In [197]:
models = [OnlineLogisticRegression(1, 1, user_features.shape[1], -5, 15) for i in range(n_playlists)]

In [201]:
len(models)

862

In [211]:
OnlineLogisticRegression(1, 1, user_features.shape[1], -5, 15).w

array([ 1.27869833, -1.48647507, -0.62202638, -0.75782075,  0.39607624,
       -0.54487322, -0.88793485,  1.579089  ,  1.60794286,  1.37056297,
       -0.46391536, -1.79913816, -1.78860467,  0.19042472,  0.82060434,
        0.29772032, -1.60539102, -0.34645159,  0.98695482, -0.33224903,
       -0.50553315, -1.35338951, -0.26477693,  1.26506342,  0.61343787,
       -0.61221625,  0.22308549, -0.19776524, -2.13924161,  0.44671884,
        1.51436437,  0.72517748, -0.04192129,  2.75094731, -0.55387553,
        0.51102139, -1.09876652,  0.42883176, -2.0505891 ,  2.14068369,
        0.53210255, -0.71054818, -1.9768875 ,  0.31741762,  0.75528563,
       -0.95323037, -1.79168409,  0.69582478, -0.95996757, -0.78318353,
        0.89542914, -2.25591422, -0.04669314, -1.67167934,  0.37346505,
       -0.75564856,  1.04252998, -0.53527609,  0.22679408,  0.0123912 ,
        0.28131679, -2.23992095, -0.41094878,  0.79243092, -1.76195578,
        0.83335932, -0.85220451,  1.28711104,  0.35586347, -0.74

In [212]:
np.random.normal(np.zeros(97), 1 * (np.ones(97))**(-1.0), size = 97).shape

(97,)