In [127]:
import pandas as pd
import numpy as np
from scipy.special import expit

In [3]:
from environment import ContextualEnvironment
from policies import KLUCBSegmentPolicy, RandomPolicy, ExploreThenCommitSegmentPolicy, EpsilonGreedySegmentPolicy, TSSegmentPolicy, LinearTSPolicy
import argparse
import json
import logging
import numpy as np
import pandas as pd
import time

In [4]:
parser = argparse.ArgumentParser()
parser.add_argument("--users_path", type = str, default = "data/user_features.csv", required = False,
                    help = "Path to user features file")
parser.add_argument("--playlists_path", type = str, default = "data/playlist_features.csv", required = False,
                    help = "Path to playlist features file")
parser.add_argument("--output_path", type = str, default = "results.json", required = False,
                    help = "Path to json file to save regret values")
parser.add_argument("--policies", type = str, default = "random,ts-seg-naive", required = False,
                    help = "Bandit algorithms to evaluate, separated by commas")
parser.add_argument("--n_recos", type = int, default = 12, required = False,
                    help = "Number of slots L in the carousel i.e. number of recommendations to provide")
parser.add_argument("--l_init", type = int, default = 3, required = False,
                    help = "Number of slots L_init initially visible in the carousel")
parser.add_argument("--n_users_per_round", type = int, default = 20000, required = False,
                    help = "Number of users randomly selected (with replacement) per round")
parser.add_argument("--n_rounds", type = int, default = 100, required = False,
                    help = "Number of simulated rounds")
parser.add_argument("--print_every", type = int, default = 10, required = False,
                    help = "Print cumulative regrets every 'print_every' round")

args = parser.parse_args(args = [])

In [5]:
playlists_df = pd.read_csv('data/playlist_features.csv')

users_df = pd.read_csv('data/user_features_small.csv')

n_users = len(users_df)
n_playlists = len(playlists_df)

In [6]:
users_df

Unnamed: 0,segment,dim_0,dim_1,dim_2,dim_3,dim_4,dim_5,dim_6,dim_7,dim_8,...,dim_86,dim_87,dim_88,dim_89,dim_90,dim_91,dim_92,dim_93,dim_94,dim_95
0,0,0.238895,1.559068,-1.492596,1.041009,2.040157,-2.914569,-0.743588,1.996449,-0.917277,...,-0.652371,-0.539649,0.189309,1.241029,0.767918,0.619496,4.004763,1.312571,-0.441051,0.898858
1,0,-0.268669,0.617194,-0.742123,1.677183,1.369093,-1.76186,-1.237178,1.276799,-2.924475,...,-0.060747,-1.31246,1.352925,1.251065,-1.624764,1.314707,2.027448,0.988371,-0.31462,1.583408
2,0,0.730793,1.615805,-0.540211,1.519919,0.474167,-2.616138,-1.168033,0.935113,-1.69511,...,-0.041504,0.456923,0.24924,0.878546,-0.787824,0.495024,1.752296,0.536689,-0.106732,0.516656
3,0,1.739003,1.546201,0.756946,2.240684,0.636115,-2.327271,-0.68407,1.323303,-3.234894,...,-1.372924,-1.172656,0.859095,1.026131,-0.410318,0.017065,-0.019434,0.003572,-0.638508,0.527119
4,0,0.0609,1.313942,-0.725372,0.953238,1.338075,-2.561447,-0.384696,1.040172,-1.201964,...,-0.92038,-0.417005,0.705332,0.741709,-1.598617,0.882672,2.02324,0.971578,-0.329573,0.629994
5,0,-0.33771,0.994489,-1.16442,0.701745,1.000777,-2.53782,-1.059542,1.862303,-2.521206,...,-0.596163,0.675055,-0.310828,-0.541382,-1.274944,1.066462,1.960495,0.638103,0.012242,1.127664
6,0,0.355553,1.31533,-0.543777,2.493248,3.041749,-3.026217,-0.734426,0.278896,0.352283,...,0.767704,-1.034823,1.684467,0.58124,-1.790003,1.020681,3.9429,1.417614,0.160657,-0.708674
7,0,-1.163979,-0.476468,-0.853902,0.571501,1.061989,-2.107837,-0.919948,2.836736,-0.982072,...,-0.187124,-0.173995,1.641843,1.497386,-2.70865,1.65354,0.799381,0.54167,0.080587,0.388664
8,0,0.047584,0.757017,0.299388,1.218914,0.671031,-3.283733,-1.343751,1.252397,-1.550047,...,-0.063909,-0.567261,-0.053998,0.229956,-1.894081,0.711187,2.428878,0.563999,0.439974,0.276861


In [7]:
user_features = np.array(users_df.drop(["segment"], axis = 1))
user_features = np.concatenate([user_features, np.ones((n_users,1))], axis = 1)
playlist_features = np.array(playlists_df)

In [8]:
user_segment = np.array(users_df.segment)

In [9]:
user_segment

array([0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [10]:
def set_policies(policies_name, user_segment, user_features, n_playlists):
    # Please see section 3.3 of RecSys paper for a description of policies
    POLICIES_SETTINGS = {
        'random' : RandomPolicy(n_playlists),
        'etc-seg-explore' : ExploreThenCommitSegmentPolicy(user_segment, n_playlists, min_n = 100, cascade_model = True),
        'etc-seg-exploit' : ExploreThenCommitSegmentPolicy(user_segment, n_playlists, min_n = 20, cascade_model = True),
        'epsilon-greedy-explore' : EpsilonGreedySegmentPolicy(user_segment, n_playlists, epsilon = 0.1, cascade_model = True),
        'epsilon-greedy-exploit' : EpsilonGreedySegmentPolicy(user_segment, n_playlists, epsilon = 0.01, cascade_model = True),
        'kl-ucb-seg' : KLUCBSegmentPolicy(user_segment, n_playlists, cascade_model = True),
        'ts-seg-naive' : TSSegmentPolicy(user_segment, n_playlists, alpha_zero = 1, beta_zero = 1, cascade_model = True),
        'ts-seg-pessimistic' : TSSegmentPolicy(user_segment, n_playlists, alpha_zero = 1, beta_zero = 99, cascade_model = True),
        'ts-lin-naive' : LinearTSPolicy(user_features, n_playlists, bias = 0.0, cascade_model = True),
        'ts-lin-pessimistic' : LinearTSPolicy(user_features, n_playlists, bias = -5.0, cascade_model = True),
        # Versions of epsilon-greedy-explore and ts-seg-pessimistic WITHOUT cascade model
        'epsilon-greedy-explore-no-cascade' : EpsilonGreedySegmentPolicy(user_segment, n_playlists, epsilon = 0.1, cascade_model = False),
        'ts-seg-pessimistic-no-cascade' : TSSegmentPolicy(user_segment, n_playlists, alpha_zero = 1, beta_zero = 99, cascade_model = False)
    }

    return [POLICIES_SETTINGS[name] for name in policies_name]

In [11]:
po = 'random,etc-seg-explore,ts-seg-pessimistic'
policies_name = po.split(",")

In [12]:
policies = set_policies(policies_name, user_segment, user_features, n_playlists)

In [13]:
n_policies = len(policies) # 3
n_users_per_round = args.n_users_per_round
n_rounds = args.n_rounds
overall_rewards = np.zeros((n_policies, n_rounds))
overall_optimal_reward = np.zeros(n_rounds)

In [40]:
cont_env = ContextualEnvironment(user_features, playlist_features, user_segment, args.n_recos)

In [46]:
cont_env.th_rewards

array([0.51069545, 0.60661954, 0.45462272, 0.35597582, 0.45974146,
       0.33826388, 0.94396647, 0.6756706 , 0.35810638])

In [45]:
np.take(cont_env.th_rewards, user_ids).sum()

10524.104014608263

In [None]:
'''
1. 라운드 별 user_id 선정

'''

for i in range(n_rounds):
    # Select batch of n_users_per_round users
    user_ids = np.random.choice(range(n_users), n_users_per_round) # range & 수 > 중복되는 user_id가 있는데..?
    overall_optimal_reward[i] = np.take(cont_env.th_rewards, user_ids).sum()
    # Iterate over all policies
    for j in range(n_policies):
        # Compute n_recos recommendations
        recos = policies[j].recommend_to_users_batch(user_ids, args.n_recos, args.l_init)
        # Compute rewards
        rewards = cont_env.simulate_batch_users_reward(batch_user_ids= user_ids, batch_recos=recos)
        # Update policy based on rewards
        policies[j].update_policy(user_ids, recos, rewards, args.l_init)
        overall_rewards[j,i] = rewards.sum()
    # Print info
    if i == 0 or (i+1) % print_every == 0 or i+1 == n_rounds:
        logger.info("Round: %d/%d. Elapsed time: %f sec." % (i+1, n_rounds, time.time() - start_time))
        logger.info("Cumulative regrets: \n%s \n" % "\n".join(["	%s : %s" % (policies_name[j], str(np.sum(overall_optimal_reward - overall_rewards[j]))) for j in range(n_policies)]))


In [51]:
n_users = 9
u = 0
step = 100000

In [None]:
users_ids = range(u, min(n_users, u + step)) # 더 작은 항목 > 마지막 배치 job 때를 위해                       # 
opt_recos = self.compute_optimal_recos(users_ids, self.n_recos)
opt_rewards = self.compute_theoretical_rewards(users_ids, opt_recos)
self.th_rewards[u:min(n_users, u + step)] = opt_rewards
u += step

In [91]:
probas = np.take(user_features, range(0,9), axis = 0).dot(playlist_features.T)

In [102]:
so = np.argsort(-probas)[:, :12]

In [135]:
so1 = np.take(playlist_features, so, axis = 0)

In [143]:
so1[0].shape

(12, 97)

In [128]:
for i in range(9):                                                            # user 수 만큼 for 문 수행
    probas = expit(np.take(user_features, range(0,9), axis = 0)[i].dot(so1[i].T))        # sigmoid 
    print(probas)
    #th_reward[i] = 1 - reduce(lambda x,y : x * y, 1 - probas)

ValueError: shapes (97,) and (12,) not aligned: 97 (dim 0) != 12 (dim 0)

In [None]:
    def compute_theoretical_rewards(self, batch_user_ids, batch_recos):
        batch_user_features = np.take(self.user_features, batch_user_ids, axis = 0)         # batch 안에 있는 user에 대한 user_feature만 추출
        batch_playlist_features = np.take(self.playlist_features, batch_recos, axis = 0)    # 추천리스트에 있는 item에 대한 feature 추출
        n_users = len(batch_user_ids)                                                       # batch 내에 있는 유저 수
        th_reward = np.zeros(n_users)                                                       # zero vector 생성 (user 수)
        for i in range(n_users):                                                            # user 수 만큼 for 문 수행
            probas = expit(batch_user_features[i].dot(batch_playlist_features[i].T))        # sigmoid 
            th_reward[i] = 1 - reduce(lambda x,y : x * y, 1 - probas)
        return th_reward