In [7]:
from environment import ContextualEnvironment
from policies import KLUCBSegmentPolicy, RandomPolicy, ExploreThenCommitSegmentPolicy, EpsilonGreedySegmentPolicy, TSSegmentPolicy, LinearTSPolicy
import argparse
import json
import logging
import numpy as np
import pandas as pd
import time
from main import *

In [2]:
users_path = "data/user_features.csv"
users_df = pd.read_csv(users_path)

playlists_path = "data/playlist_features.csv"
playlists_df = pd.read_csv(playlists_path)


In [3]:
n_users = len(users_df)
n_playlists = len(playlists_df)
n_recos = 12
print_every = 10

In [4]:
user_features = np.array(users_df.drop(["segment"], axis = 1))
user_features = np.concatenate([user_features, np.ones((n_users,1))], axis = 1)
playlist_features = np.array(playlists_df)

In [5]:
user_segment = np.array(users_df.segment)

In [6]:
cont_env = ContextualEnvironment(user_features, playlist_features, user_segment, n_recos)

In [13]:
policies_name = "random".split(",")
policies = set_policies(policies_name, user_segment, user_features, n_playlists)
n_policies = len(policies)

n_users_per_round = 20000
n_rounds = 100
overall_rewards = np.zeros((n_policies, n_rounds))
overall_optimal_reward = np.zeros(n_rounds)

In [14]:
output_path = 'reproducibility_exp/results_%.json'

In [10]:
for k in range(1):
    print("Run: %d"%(k))
    cont_env = ContextualEnvironment(user_features, playlist_features, user_segment, n_recos)    
    #print("STARTING SIMULATIONS")
    #print("for %d rounds, with %d users per round (randomly drawn with replacement)\n \n" % (n_rounds, n_users_per_round))
    start_time = time.time()
    for i in range(n_rounds):
        # Select batch of n_users_per_round users
        user_ids = np.random.choice(range(n_users), n_users_per_round)
        overall_optimal_reward[i] = np.take(cont_env.th_rewards, user_ids).sum()
        # Iterate over all policies
        for j in range(n_policies):
            # Compute n_recos recommendations
            recos = policies[j].recommend_to_users_batch(user_ids, args.n_recos, args.l_init)
            # Compute rewards
            rewards = cont_env.simulate_batch_users_reward(batch_user_ids= user_ids, batch_recos=recos)
            # Update policy based on rewards
            policies[j].update_policy(user_ids, recos, rewards, args.l_init)
            overall_rewards[j,i] = rewards.sum()
        # Print info
        #if i == 0 or (i+1) % print_every == 0 or i+1 == n_rounds:
        #    print("Round: %d/%d. Elapsed time: %f sec." % (i+1, n_rounds, time.time() - start_time))
        #    print("Cumulative regrets: \n%s \n" % "\n".join(["	%s : %s" % (policies_name[j], str(np.sum(overall_optimal_reward - overall_rewards[j]))) for j in range(n_policies)]))

    output_path = 'reproducibility_exp/New_enviro_results_%d.json'%(k)
    print("Saving cumulative regrets in %s" % output_path)
    cumulative_regrets = {policies_name[j] : list(np.cumsum(overall_optimal_reward - overall_rewards[j])) for j in range(n_policies)}
    with open(output_path, 'w') as fp:
        json.dump(cumulative_regrets, fp)
