In [1]:
import gym, recogym
import matplotlib.pyplot as plt
from copy import deepcopy
import numpy as np
import pandas as pd
import pickle as pkl
pd.options.mode.chained_assignment = None 
# from inspect import getsource
from recogym.evaluate_agent_sale import verify_agents_sale, plot_verify_agents_sale, plot_CR_CTR
from tqdm import tqdm

# env_1_sale_args is a dictionary of default parameters (i.e. number of products)
from recogym import env_1_sale_args, Configuration
from recogym.envs.utils_sale import (share_states, share_sale, env_infos, count_sales_first_session, 
                                     get_beta_confidence_interval, share_user_with_sale, share_clicks_with_sale)
# You can overwrite environment arguments here:
env_1_sale_args['random_seed'] = 42

# Initialize the gym for the first time by calling .make() and .init_gym()
env = gym.make('reco-gym-sale-v1')
env.init_gym(env_1_sale_args)

# .reset() env before each episode (one episode per user).
env.reset()
done = False

env_1_sale_args['num_products'] = 10
num_products = env_1_sale_args['num_products']

# You can overwrite environment arguments here:
env_1_sale_args['random_seed'] = 42

# Initialize the gym 
env = gym.make('reco-gym-sale-v1')
env.init_gym(env_1_sale_args)

env_1_sale_args['number_of_flips'] = 5

## Build targets

In [2]:
## Clicks
from recogym.agents.sale_agent import ClickRewardProvider
Click_rewards = ClickRewardProvider()

## MDP
from recogym.agents.sale_agent import MDPRewardProvider
MDP_rewards = MDPRewardProvider()
MDP_rewards_all = MDPRewardProvider(clicks_only=False)
MDP_rewards_pureorganic = MDPRewardProvider(clicks_only=False, organic_only=True)

rewards = {'click':Click_rewards,
          'MDP':MDP_rewards,
          'MDP_all':MDP_rewards_all,
          'MDP_pureorganic':MDP_rewards_pureorganic}

## Build features

In [3]:
from recogym.agents.sale_agent import CountViewsClicksFeatureProvider, CountViewsFeatureProvider, ShareViewsClicksFeatureProvider, ShareViewsFeatureProvider
vc_feature = CountViewsClicksFeatureProvider(env.config)
v_feature = CountViewsFeatureProvider(env.config)
vc_share_feature = ShareViewsClicksFeatureProvider(env.config)
v_share_feature = ShareViewsFeatureProvider(env.config)

features = {'vc':vc_feature,
           'v':v_feature,
           'vc_share':vc_share_feature,
           'v_share':v_share_feature}

## Train baseline agents

In [9]:
# Choose number of users for training and AB test
env_1_sale_args['num_users'] = 1000
env_1_sale_args['num_users_AB'] = 5000
num_users = env_1_sale_args['num_users']
num_users_AB = env_1_sale_args['num_users_AB']

# Choose features
feature_name = 'v_share'
feature = features[feature_name]

In [20]:
from recogym.agents.sale_agent import build_train_data
from recogym.agents.sale_agent import SaleLikelihoodAgent, SaleProductLikelihoodAgent

agents={}
logs={}

############## Random agent
from recogym.agents import RandomAgent, random_args
random_agent = RandomAgent(Configuration(random_args))
name_agent = 'rand'
agents[name_agent] = random_agent
try:
    data = pkl.load(open(str('data\data'+str(num_users)+name_agent+'.pkl'),'rb'))
except :
    data = deepcopy(env).generate_logs(num_users)
    pkl.dump(data,open(str('data\data'+str(num_users)+name_agent+'.pkl'),'wb'))
logs[name_agent] = data
    
############## Organic agent
from recogym.agents import OrganicUserEventCounterAgent, organic_user_count_args
organic_counter_agent = OrganicUserEventCounterAgent(Configuration({**organic_user_count_args,
                                                                    **env_1_sale_args,
                                                                    'select_randomly': True}))
name_agent = 'organic'
agents[name_agent] = organic_counter_agent
try:
    data = pkl.load(open(str('data\data'+str(num_users)+name_agent+'.pkl'),'rb'))
except :
    data = deepcopy(env).generate_logs(num_users,agent=organic_counter_agent)
    pkl.dump(data,open(str('data\data'+str(num_users)+name_agent+'.pkl'),'wb'))
logs[name_agent] = data
    
############## Likelihood click
name_agent = "likclick"
try :
    data = pkl.load(open(str('data\data'+str(num_users)+name_agent+'.pkl'),'rb'))
except:
    likelihood_logreg_click = SaleLikelihoodAgent(feature, Click_rewards)
    likelihood_logreg_click.train(data)
    agents[name_agent] = likelihood_logreg_click
    data = deepcopy(env).generate_logs(num_users, agent=likelihood_logreg_click)
    pkl.dump(data,open(str('data\data'+str(num_users)+name_agent+'.pkl'),'wb'))
logs[name_agent] = data

## Random logging policy

In [26]:
def train_agents(name_logging,logs,feature_name,features):
    info = {}
    save_agents = {}
    data = logs[name_logging]
    feature = features[feature_name]
    
    # click agent
    likelihood_logreg_click = SaleLikelihoodAgent(feature, Click_rewards)
    likelihood_logreg_click.train(data)
    info[likelihood_logreg_click.info["Name"]] = likelihood_logreg_click.info
    save_agents["likelihood_logreg_click"] = likelihood_logreg_click
    
    # No discount
    likelihood_saleclickprod = SaleProductLikelihoodAgent(feature_provider_list=[feature,feature], 
                                                    reward_provider_list=[Click_rewards,MDP_rewards_all], 
                                                    discounts=[0,0],discounts_with_action=False)
    likelihood_saleclickprod.train(data)
    info[likelihood_saleclickprod.info["Name"]] = likelihood_saleclickprod.info
    save_agents["likelihood_saleclickprod"] = likelihood_saleclickprod

    # non-specific discount, all observations
    likelihood_saleclickprod_discount_all = SaleProductLikelihoodAgent(feature_provider_list=[feature,feature,feature], 
                                                    reward_provider_list=[Click_rewards,MDP_rewards_all,MDP_rewards_pureorganic], 
                                                    discounts=[0,0,-1],discounts_with_action=False)
    likelihood_saleclickprod_discount_all.train(data)
    info[likelihood_saleclickprod_discount_all.info["Name"]] = likelihood_saleclickprod_discount_all.info
    save_agents["likelihood_saleclickprod_discount_all"] = likelihood_saleclickprod_discount_all
    
    # non-specific discount, clicked observations
    likelihood_saleclickprod_discount = SaleProductLikelihoodAgent(feature_provider_list=[feature,feature,feature], 
                                                    reward_provider_list=[Click_rewards,MDP_rewards,MDP_rewards_pureorganic], 
                                                    discounts=[0,0,-1],discounts_with_action=False)
    likelihood_saleclickprod_discount.train(data)
    info[likelihood_saleclickprod_discount.info["Name"]] = likelihood_saleclickprod_discount.info
    save_agents["likelihood_saleclickprod_discount"] = likelihood_saleclickprod_discount
    
    # Per product discount, all observations
    likelihood_saleclickprod_discount_spe_all = SaleProductLikelihoodAgent(feature_provider_list=[feature,feature,feature], 
                                                    reward_provider_list=[Click_rewards,MDP_rewards_all,MDP_rewards_pureorganic], 
                                                    discounts=[0,0,-1],discounts_with_action=False)
    likelihood_saleclickprod_discount_spe_all.train(data)
    info[likelihood_saleclickprod_discount_spe_all.info["Name"]] = likelihood_saleclickprod_discount_spe_all.info
    save_agents["likelihood_saleclickprod_discount_spe_all"] = likelihood_saleclickprod_discount_spe_all
    
    # Per product discount, clicked observations
    likelihood_saleclickprod_discount_spe = SaleProductLikelihoodAgent(feature_provider_list=[feature,feature,feature], 
                                                    reward_provider_list=[Click_rewards,MDP_rewards,MDP_rewards_pureorganic], 
                                                    discounts=[0,0,-1],discounts_with_action=False)
    likelihood_saleclickprod_discount_spe.train(data)
    info[likelihood_saleclickprod_discount_spe.info["Name"]] = likelihood_saleclickprod_discount_spe.info
    save_agents["likelihood_saleclickprod_discount_spe"] = likelihood_saleclickprod_discount_spe
    
    pkl.dump([info,save_agents],open(str('data/agents'+str(num_users)+name_logging+feature_name+'.pkl'),'wb'))
    return info, save_agents

In [27]:
res_dict={}

In [None]:
name_logging = 'rand'
data = logs[name_logging]
try :
    info, save_agents = pkl.load(open(str('data/agents'+str(num_users)+name_logging+feature_name+'.pkl'),'rb'))
except :
    info, save_agents = train_agents(name_logging,logs,feature_name,features)

# A/B test
env.reset()
res=verify_agents_sale(
    env,
    number_of_users=num_users_AB,
    agents=save_agents
)

# save result
pkl.dump([res, env_1_sale_args, info, save_agents],
         open("data/res_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+".pkl","wb"))
res_dict[name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name] = res

# plot result
plot_verify_agents_sale(res["sale rate"], res["CTR"], res["Tot sales"], 
                        res['Share user with sale'], res['Share sale after click'],
                       res["User embeddings"])
None

Organic Users: 0it [00:00, ?it/s]
Users:  18%|████████████▎                                                         | 883/5000 [26:05<4:42:07,  4.11s/it]

In [None]:
name_logging = 'organic'
data = logs[name_logging]
try :
    info, save_agents = pkl.load(open(str('data/agents'+str(num_users)+name_logging+feature_name+'.pkl'),'rb'))
except :
    info, save_agents = train_agents(name_logging,logs,feature_name,features)

# A/B test
env.reset()
res=verify_agents_sale(
    env,
    number_of_users=num_users_AB,
    agents=save_agents
)

# save result
pkl.dump([res, env_1_sale_args, info, save_agents],
         open("data/res_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+".pkl","wb"))
res_dict[name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name] = res

# plot result
plot_verify_agents_sale(res["sale rate"], res["CTR"], res["Tot sales"], 
                        res['Share user with sale'], res['Share sale after click'],
                       res["User embeddings"])
None

In [None]:
name_logging = 'likclick'
data = logs[name_logging]
try :
    info, save_agents = pkl.load(open(str('data/agents'+str(num_users)+name_logging+feature_name+'.pkl'),'rb'))
except :
    info, save_agents = train_agents(name_logging,logs,feature_name,features)

# A/B test
env.reset()
res=verify_agents_sale(
    env,
    number_of_users=num_users_AB,
    agents=save_agents
)

# save result
pkl.dump([res, env_1_sale_args, info, save_agents],
         open("data/res_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+".pkl","wb"))
res_dict[name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name] = res

# plot result
plot_verify_agents_sale(res["sale rate"], res["CTR"], res["Tot sales"], 
                        res['Share user with sale'], res['Share sale after click'],
                       res["User embeddings"])
None

In [None]:
pkl.dump(res_dict,
         open("data/res_dict"+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+"_"+str(len(res_dict)+".pkl","wb"))