# Training and A/B test for Conversion agents - <font color='blue'> Proportion</font> of sale

In [15]:
import gym, recogym
import matplotlib.pyplot as plt
from copy import deepcopy
import numpy as np
import pandas as pd
import pickle as pkl
pd.options.mode.chained_assignment = None 
# from inspect import getsource
from recogym.evaluate_agent_sale import verify_agents_sale, display_metrics, verify_agents_sale_extended
from recogym.run_agent_sale import run_AB_test
from tqdm import tqdm

# env_1_sale_args is a dictionary of default parameters (i.e. number of products)
from recogym import env_1_sale_args, Configuration
from recogym.agents.sale_agent import train_agents, train_timeagents
from recogym.envs.utils_sale import format_avg_result, avg_result, format_avg_result_extended, avg_result_extended 

from joblib import Parallel, delayed

# You can overwrite environment arguments here:
env_1_sale_args['random_seed'] = 0
env_1_sale_args['num_products'] = 10
env_1_sale_args['number_of_flips'] = 10 

num_products = env_1_sale_args['num_products']
print('Number of products =',num_products)
print('Number of flips =',env_1_sale_args['number_of_flips'])
nb_flips = env_1_sale_args['number_of_flips']

# You can overwrite environment arguments here:
env_1_sale_args['random_seed'] = 42
env_1_sale_args['mu_sale'] = False 

# env_1_sale_args['kappa'] = 0.5
print('Value of kappa =',env_1_sale_args['kappa'])

# Initialize the gym 
env = gym.make('reco-gym-sale-v1')
env.init_gym(env_1_sale_args)



Number of products = 10
Number of flips = 10
Value of kappa = 0.2


In [16]:
# Repository to save pickles
data_repo = 'data_conversion/'

## Train baseline agents

#### Settings

In [17]:
## Choose number of users for training and AB test
# Number of users for the training
env_1_sale_args['num_users'] = 6 ##tochange !!
# env_1_sale_args['num_users'] = 5000
num_users = env_1_sale_args['num_users']

# Number of users for the A/B test
env_1_sale_args['num_users_AB'] = 7 ##tochange !!
# env_1_sale_args['num_users_AB'] = 5000 
num_users_AB = env_1_sale_args['num_users_AB']

# Choose user features
from recogym.agents.sale_agent import CountViewsClicksFeatureProvider, CountViewsFeatureProvider, ShareViewsClicksFeatureProvider, ShareViewsFeatureProvider
vc_feature = CountViewsClicksFeatureProvider(env.config)
v_feature = CountViewsFeatureProvider(env.config)
vc_share_feature = ShareViewsClicksFeatureProvider(env.config)
v_share_feature = ShareViewsFeatureProvider(env.config)
features = {'vc':vc_feature,
           'v':v_feature,
           'vc_share':vc_share_feature,
           'v_share':v_share_feature}
feature_name = 'v_share'
feature = features[feature_name]

#### <font color='red'> Number of A/B tests</font>

In [26]:
# # Choose number of A/B tests
num_AB_tests = 4 ##tochange !
# num_AB_tests = 25 ##tochange !

In [27]:
num_cores = 8 ##tochange!!

#### Logs

In [20]:
agents={}
logs={}

############## Random agent
name_agent = 'rand'+str(nb_flips)
from recogym.agents import RandomAgent, random_args
random_agent = RandomAgent(Configuration(random_args))
agents[name_agent] = random_agent


try:
    logs[name_agent] = pkl.load(open(data_repo + 'data' + str(num_users) + name_agent + '.pkl','rb'))
    print('--- Logs loaded---')
except: 
    print("--- Generate logs ---")
    logs[name_agent] = deepcopy(env).generate_logs(num_users)
    print(data_repo + 'data' + str(num_users) + name_agent + '.pkl')
    pkl.dump(logs[name_agent], open(data_repo + 'data' + str(num_users) + name_agent + '.pkl','wb'))

--- Logs loaded---


#### Training for the <font color='blue'> Proportion</font> of sale

##### No weights

In [21]:
def run_func(i): 
    return run_AB_test(i,name_ext=name_extension,env=env,num_users=num_users,num_users_AB=num_users_AB,
                                    agents=agents,save_agents=save_agents,name_logging=name_logging,feature_name=feature_name,
                                    data_repo=data_repo)

In [28]:
name_extension = 'prop'
name_logging = name_agent
info, save_agents = train_timeagents(name_logging,logs,feature_name,features, num_users=num_users, 
                                     kronecker_features=True,linear_reg=True, repo = data_repo)
r_list = Parallel(n_jobs=int(num_cores/2), verbose=10)(delayed(run_func)(i) for i in range(num_AB_tests))
res_dict = {r_list[i]['name']:r_list[i] for i in range(len(r_list))}
res_avg = avg_result_extended(res_dict)
(res_recap, res_recap_latex, 
 res_AB, res_AB_latex, 
 res_true, res_true_latex) = format_avg_result_extended(res_avg) #get dataframe & corresponding latex table
pkl.dump(res_dict, open(data_repo+"clean/res_dict_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+name_extension+".pkl",'wb'))
pkl.dump(res_avg, open(data_repo+"clean/res_avg_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+name_extension+".pkl",'wb'))
res_recap.to_csv(data_repo+"clean/res_recap_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+name_extension+".csv",index = False)
res_true.to_csv(data_repo+"clean/res_true_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+name_extension+".csv",index = False)

No discount
Discount


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   1 tasks      | elapsed:  1.6min
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:  1.6min remaining:  1.6min
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  1.7min remaining:    0.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:  1.7min finished


-- A/B test --


Unnamed: 0,Agent,CTR,Att Sales,Att CR,Sales,CR
0,Rand,0.018 (0.000) %,0 (0),0.31 (0.00) %,0 (0),1.06 (0.00) %
1,PCS,0.029 (0.000) %,0 (0),0.87 (0.00) %,0 (0),0.87 (0.00) %
2,DPCS,0.018 (0.000) %,0 (0),0.31 (0.00) %,0 (0),0.87 (0.00) %


-- True metrics --


Unnamed: 0,Agent,True CTR,True PCS,True OS,True NCS,NDPC,DPCSO,DPCSN
0,Rand,0.018 (0.000) %,0.15 (0.00) %,0.11 (0.00) %,0.11 (0.00) %,0.017 (0.000) pm,0.004 (0.000) pm,0.004 (0.000) pm
1,PCS,0.021 (0.000) %,0.13 (0.00) %,0.10 (0.00) %,0.11 (0.00) %,0.018 (0.000) pm,0.004 (0.000) pm,0.003 (0.000) pm
2,DPCS,0.022 (0.000) %,0.13 (0.00) %,0.06 (0.00) %,0.11 (0.00) %,0.019 (0.000) pm,0.011 (0.000) pm,0.004 (0.000) pm


In [23]:
def run_prop_noweight():
    name_extension = 'prop'
    name_logging = name_agent
    info, save_agents = train_timeagents(name_logging,logs,feature_name,features, num_users=num_users, 
                                         kronecker_features=True,linear_reg=True, repo = data_repo)
    r_list = Parallel(n_jobs=int(num_cores/2), verbose=10)(delayed(
                        run_AB_test(i,name_ext=name_extension)) for i in range(num_AB_tests))
    res_dict = {r_list[i]['name']:r_list[i] for i in range(len(r_list))}
    res_avg = avg_result_extended(res_dict)
    (res_recap, res_recap_latex, 
     res_AB, res_AB_latex, 
     res_true, res_true_latex) = format_avg_result_extended(res_avg) #get dataframe & corresponding latex table
    pkl.dump(res_dict, open(data_repo+"clean/res_dict_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+name_extension+".pkl",'wb'))
    pkl.dump(res_avg, open(data_repo+"clean/res_avg_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+name_extension+".pkl",'wb'))
    res_recap.to_csv(data_repo+"clean/res_recap_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+name_extension+".csv",index = False)
    res_true.to_csv(data_repo+"clean/res_true_"+name_logging+str(num_users)+"_"+str(num_users_AB)+"_"+feature_name+name_extension+".csv",index = False)

In [9]:
Parallel(n_jobs=int(num_cores), verbose=10)(delayed(
                    run_prop_noweight) for name_extension in ['prop','propweights'])

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done   2 out of   2 | elapsed:    4.5s remaining:    0.0s


TypeError: cannot unpack non-iterable function object