In [1]:
####################################################################
# Implementation 2 of DQN by using sets of 3 items.
#     State: (Click History, User Portraits) (note: purchase timestamp is not available in testset)
#     Action: sets of 3 itemID (each sample is split into 3 steps)
#     Rewards: Total price of purchased items
# 0. split train data into training set and validation set
# 1. prepare data for DQN from training set
# 2. prepare data for DQN from validation set
# 3. train DQN
# 4. make suggestions for validation set
# 5. Calculate Metrics 1 for our suggestions
# 6. Generate suggestions for provided testset for true scoring
#####################################################################

In [9]:
# 0. Split Train into training set and validation set
from DataPrep import *
from tqdm import tqdm
import numpy as np
import pandas as pd
userFeaturesTrain, recItemsTrain, purchaseLabelTrain, userFeaturesVal, recItemsVal, purchaseLabelVal = splitTrainSet()
userFeaturesTrain = pd.concat((userFeaturesTrain, userFeaturesVal), ignore_index=True)
recItemsTrain = np.vstack((recItemsTrain, recItemsVal))
purchaseLabelTrain = np.vstack((purchaseLabelTrain, purchaseLabelVal))

# when training, userFeaturesTrain represent state
N_ITEMS = 381
# load item info
from classes.Items import Items
itemInfo = Items()
# translator from (ID1, ID2, ID3) to setID
from classes.ItemSet import ItemSet3
itemSet3 = ItemSet3()

  userFeaturesTrain, recItemsTrain, purchaseLabelTrain, userFeaturesVal, recItemsVal, purchaseLabelVal = splitTrainSet()
  itemInfo = Items()


In [10]:
# 1. prepare data for training set
import numpy as np
from d3rlpy.dataset import MDPDataset

statesTrain = []
actionsTrain = []
rewardsTrain = []
terminalTrain = []  # terminal flag: 0 = game continue, 1 = game stop

for i in tqdm(range(userFeaturesTrain.shape[0])):
# loop through samples
    state = list(userFeaturesTrain.iloc[i])
    itemList = recItemsTrain[i]
    purchase = purchaseLabelTrain[i]
    for step in range(3):
        # check if game is still running
        if step>0 and purchase[0]*purchase[1]*purchase[2]==0:
            # stop adding to data set if game stopped
            break
        if step>1 and purchase[3]*purchase[4]*purchase[5]==0:
            # stop adding to data set if game stopped
            break
        # after passing check, we can add new record to train set
        # append step to state
        if step==0:
            step_OneHot = [1, 0, 0]
        elif step==1:
            step_OneHot = [0, 1, 0]
        else:
            step_OneHot = [0, 0, 1]
        statesTrain.append(state + step_OneHot)
        # action = itemSetID
        itemIDs = (itemList[step*3], itemList[step*3+1], itemList[step*3+2])
        itemSetID = itemSet3.getSetID(itemIDs)
        actionsTrain.append(itemSetID)
        # calculate reward
        price0 = itemInfo.getItemPrice(itemIDs[0])
        price1 = itemInfo.getItemPrice(itemIDs[1])
        price2 = itemInfo.getItemPrice(itemIDs[2])
        purch0 = purchase[step*3]
        purch1 = purchase[step*3+1]
        purch2 = purchase[step*3+2]
        reward = price0*purch0 + price1*purch1 + price2*purch2
        rewardsTrain.append(reward)
        # terminal flag: determine by looking at previous purchase flags
        if step!=2:
            if purch0*purch1*purch2 == 1: # game continue if all is 1
                terminalTrain.append(0)
            else:
                terminalTrain.append(1) # game stop
        else: # game stop at step 2
            terminalTrain.append(1)

# ### terminal flags: all 1
statesTrain = np.array(statesTrain)
actionsTrain = np.array(actionsTrain)
rewardsTrain = np.array(rewardsTrain)
terminalTrain = np.array(terminalTrain)
datasetTrain = MDPDataset(statesTrain, actionsTrain, rewardsTrain, terminalTrain, discrete_action = True)

100% 260087/260087 [03:11<00:00, 1359.95it/s]


In [11]:
# 1. prepare data for validation set
statesVal = []
actionsVal = []
rewardsVal = []
terminalVal = []  # terminal flag: 0 = game continue, 1 = game stop

for i in tqdm(range(userFeaturesTrain.shape[0])):
# loop through samples
    state = list(userFeaturesTrain.iloc[i])
    itemList = recItemsTrain[i]
    purchase = purchaseLabelTrain[i]
    for step in range(3):
        # check if game is still running
        if step>0 and purchase[0]*purchase[1]*purchase[2]==0:
            # stop adding to data set if game stopped
            break
        if step>1 and purchase[3]*purchase[4]*purchase[5]==0:
            # stop adding to data set if game stopped
            break
        # after passing check, we can add new record to train set
        # append step to state
        if step==0:
            step_OneHot = [1, 0, 0]
        elif step==1:
            step_OneHot = [0, 1, 0]
        else:
            step_OneHot = [0, 0, 1]
        statesVal.append(state + step_OneHot)
        # action = itemSetID
        itemIDs = (itemList[step*3], itemList[step*3+1], itemList[step*3+2])
        itemSetID = itemSet3.getSetID(itemIDs)
        actionsVal.append(itemSetID)
        # calculate reward
        price0 = itemInfo.getItemPrice(itemIDs[0])
        price1 = itemInfo.getItemPrice(itemIDs[1])
        price2 = itemInfo.getItemPrice(itemIDs[2])
        purch0 = purchase[step*3]
        purch1 = purchase[step*3+1]
        purch2 = purchase[step*3+2]
        reward = price0*purch0 + price1*purch1 + price2*purch2
        rewardsVal.append(reward)
        # terminal flag: determine by looking at previous purchase flags
        if step!=2:
            if purch0*purch1*purch2 == 1: # game continue if all is 1
                terminalVal.append(0)
            else:
                terminalVal.append(1) # game stop
        else: # game stop at step 2
            terminalVal.append(1)

# ### terminal flags: all 1
statesVal = np.array(statesVal)
actionsVal = np.array(actionsVal)
rewardsVal = np.array(rewardsVal)
terminalVal = np.array(terminalVal)
datasetVal = MDPDataset(statesVal, actionsVal, rewardsVal, terminalVal, discrete_action = True)

100% 260087/260087 [03:17<00:00, 1319.01it/s]


In [12]:
# save checkpoint
with open('/tf/shared/checkpoints/data-3D.pkl', 'wb') as file:
    pickle.dump((statesTrain, actionsTrain, rewardsTrain, terminalTrain, statesVal, actionsVal, rewardsVal, terminalVal),
                file, protocol=pickle.HIGHEST_PROTOCOL)

In [1]:
# load checkpoint
import pickle
with open('/tf/shared/checkpoints/data-3D.pkl', 'rb') as file:
    statesTrain, actionsTrain, rewardsTrain, terminalTrain, statesVal, actionsVal, rewardsVal, terminalVal = pickle.load(file)
from d3rlpy.dataset import MDPDataset
datasetTrain = MDPDataset(statesTrain, actionsTrain, rewardsTrain, terminalTrain, discrete_action = True)
datasetVal = MDPDataset(statesVal, actionsVal, rewardsVal, terminalVal, discrete_action = True)

In [14]:
# train deep learning models 
from classes import d3rlpy_wrapper
from importlib import reload
d3rlpy_wrapper = reload(d3rlpy_wrapper)

wrapper = d3rlpy_wrapper.RLModelWrapper(datasetTrain, datasetVal)
wrapper.trainAllModels(n_epochs = 10)

2021-08-02 00:51.04 [debug    ] RoundIterator is selected.
2021-08-02 00:51.04 [info     ] Directory is created at d3rlpy_logs/DQN_20210802005104
2021-08-02 00:52.58 [info     ] Model parameters are saved to d3rlpy_logs/DQN_20210802005104/model_10367.pt
2021-08-02 00:52.58 [debug    ] RoundIterator is selected.
2021-08-02 00:52.58 [info     ] Directory is created at d3rlpy_logs/DoubleDQN_20210802005258
2021-08-02 00:54.54 [info     ] Model parameters are saved to d3rlpy_logs/DoubleDQN_20210802005258/model_10367.pt
2021-08-02 00:54.54 [debug    ] RoundIterator is selected.
2021-08-02 00:54.54 [info     ] Directory is created at d3rlpy_logs/DiscreteBCQ_20210802005454
2021-08-02 00:58.09 [info     ] Model parameters are saved to d3rlpy_logs/DiscreteBCQ_20210802005454/model_10367.pt
2021-08-02 00:58.09 [debug    ] RoundIterator is selected.
2021-08-02 00:58.09 [info     ] Directory is created at d3rlpy_logs/DiscreteCQL_20210802005809
2021-08-02 01:01.01 [info     ] Model parameters are sav

In [15]:
# save checkpoint
import pickle
with open('/tf/shared/checkpoints/models-3D.pkl', 'wb') as file:
    pickle.dump((wrapper.DQN, wrapper.DoubleDQN, wrapper.DiscreteBCQ, wrapper.DiscreteCQL),
                file, protocol=pickle.HIGHEST_PROTOCOL)


In [None]:
# reload checkpoint
import pickle
from classes import d3rlpy_wrapper
wrapper = d3rlpy_wrapper.RLModelWrapper(datasetTrain, datasetVal)
with open('/tf/shared/checkpoints/models-3D.pkl', 'rb') as file:
    wrapper.DQN, wrapper.DoubleDQN, wrapper.DiscreteBCQ, wrapper.DiscreteCQL = pickle.load(file)


In [24]:
# make predictions ----------------------------------------------
from classes import ModelPrediction, ItemSet
from importlib import reload  
ModelPrediction = reload(ModelPrediction)
ItemSet = reload(ItemSet)

modelDQN = ModelPrediction.Model3D(wrapper.DQN)
modelDoubleDQN = ModelPrediction.Model3D(wrapper.DoubleDQN)
modelDiscreteBCQ = ModelPrediction.Model3D(wrapper.DiscreteBCQ)
modelDiscreteCQL = ModelPrediction.Model3D(wrapper.DiscreteCQL)



In [28]:
# make partial prediction on Val set
import numpy as np
SMALL_SAMPLE_SIZE = 100
NBATCH_PRED = 200
statesVal_batches = np.array_split(statesVal, NBATCH_PRED)

###################### DQN ######################
# first find candidate Item Sets from a small sample
small_sample = statesVal[np.random.choice(statesVal.shape[0], SMALL_SAMPLE_SIZE, replace=False), :]
modelDQN.findCandidateItemSets(small_sample, NCandidates = 100)
DQN_recItems = []
for states in statesVal_batches:
    tmp = modelDQN.predictPartial(states)
    DQN_recItems.extend(tmp)

###################### DoubleDQN ######################
# first find candidate Item Sets from a small sample
small_sample = statesVal[np.random.choice(statesVal.shape[0], SMALL_SAMPLE_SIZE, replace=False), :]
modelDoubleDQN.findCandidateItemSets(small_sample, NCandidates = 100)
DoubleDQN_recItems = []
for states in statesVal_batches:
    tmp = modelDoubleDQN.predictPartial(states)
    DoubleDQN_recItems.extend(tmp)

###################### DiscreteBCQ ######################
# first find candidate Item Sets from a small sample
small_sample = statesVal[np.random.choice(statesVal.shape[0], SMALL_SAMPLE_SIZE, replace=False), :]
modelDiscreteBCQ.findCandidateItemSets(small_sample, NCandidates = 100)
DiscreteBCQ_recItems = []
for states in statesVal_batches:
    tmp = modelDiscreteBCQ.predictPartial(states)
    DiscreteBCQ_recItems.extend(tmp)

    
###################### DiscreteCQL ######################
# first find candidate Item Sets from a small sample
small_sample = statesVal[np.random.choice(statesVal.shape[0], SMALL_SAMPLE_SIZE, replace=False), :]
modelDiscreteCQL.findCandidateItemSets(small_sample, NCandidates = 100)
DiscreteCQL_recItems = []
for states in statesVal_batches:
    tmp = modelDiscreteCQL.predictPartial(states)
    DiscreteCQL_recItems.extend(tmp)


  0% 193/112368 [00:15<2:30:43, 12.40it/s]


KeyboardInterrupt: 

In [43]:
# calculate metrics 1

90
[[83, 128, 129], [71, 80, 130], [158, 164, 219], [83, 98, 125], [160, 187, 200], [149, 186, 237], [158, 164, 219], [158, 164, 219], [171, 194, 201]]


In [6]:
print(sys.path)

['/tf/shared', '/usr/lib/python38.zip', '/usr/lib/python3.8', '/usr/lib/python3.8/lib-dynload', '', '/usr/local/lib/python3.8/dist-packages', '/usr/lib/python3/dist-packages', '/usr/local/lib/python3.8/dist-packages/IPython/extensions', '/root/.ipython', '/tf/shared/']
