In [1]:
####################################################################
# Implementation of DQN.
#     State: (Click History, User Portraits) (note: purchase timestamp is not available in testset)
#     Action: itemID (each sample is split into 9 steps)
#     Rewards: Total price of purchased items
# 0. split train data into training set and validation set
# 1. prepare data for DQN from training set
# 2. prepare data for DQN from validation set
# 3. train DQN
# 4. make suggestions for validation set
# 5. Calculate Metrics 1 for our suggestions
# 6. Generate suggestions for provided testset for true scoring
#####################################################################

In [1]:
# 0. Split Train into training set and validation set
from DataPrep import *
from tqdm import tqdm
userFeaturesTrain, recItemsTrain, purchaseLabelTrain, userFeaturesVal, recItemsVal, purchaseLabelVal = splitTrainSet()
# when training, userFeaturesTrain represent state
N_ITEMS = 381
# load item price
itemInfo = pd.read_csv('/tf/shared/item_info.csv', ' ')
itemInfo = itemInfo.sort_values(by = 'item_id')
itemPrice = itemInfo.price


Number of Multiprocessing threads: 31


In [2]:
# dimension reduction with PCA
# comment this part out to use original user features 
import pandas as pd

from DataPrep import getPCATransformer50
PCAtransformer = getPCATransformer50()
userFeaturesTrain = pd.DataFrame(PCAtransformer.transform(userFeaturesTrain))
userFeaturesVal = pd.DataFrame(PCAtransformer.transform(userFeaturesVal))


In [3]:
# 1. prepare data for training set
import numpy as np
from d3rlpy.dataset import MDPDataset

statesTrain = []
actionsTrain = []
rewardsTrain = []
terminalTrain = []  # terminal flag: 0 = game continue, 1 = game stop
for i in tqdm(range(userFeaturesTrain.shape[0])):
# loop through samples
    state = list(userFeaturesTrain.iloc[i])
    itemList = recItemsTrain[i]
    purchase = purchaseLabelTrain[i]
    for step in range(9):
        # check if game is still running
        if step>2 and purchase[0]*purchase[1]*purchase[2]==0:
            # stop adding to data set if game stopped
            break
        if step>5 and purchase[3]*purchase[4]*purchase[5]==0:
            # stop adding to data set if game stopped
            break
        # after passing check, we can add new record to train set
        # append step to state
        statesTrain.append(state + [step])
        # action = itemID
        itemID = itemList[step]
        actionsTrain.append(itemID)
        # calculate reward
        if purchase[step]==1:
            rewardsTrain.append(itemPrice[itemID-1]) # itemID-1 becuase itemPrice is a 0-based array
        else:
            rewardsTrain.append(0)
        # terminal flag: determine by looking at previous purchase flags
        if step<2:
            terminalTrain.append(0) # game continue
        elif step==2 and purchase[0]*purchase[1]*purchase[2]==0:
            terminalTrain.append(1) # game stop
        elif step<5:
            terminalTrain.append(0) # game continue
        elif step==5 and purchase[3]*purchase[4]*purchase[5]:
            terminalTrain.append(1) # game stop
        elif step<8:
            terminalTrain.append(0) # game continue
        else:
            terminalTrain.append(1) # game stop


# ### terminal flags: all 1
statesTrain = np.array(statesTrain)
actionsTrain = np.array(actionsTrain)
rewardsTrain = np.array(rewardsTrain)
terminalTrain = np.array(terminalTrain)
datasetTrain = MDPDataset(statesTrain, actionsTrain, rewardsTrain, terminalTrain, discrete_action = True)

100%|██████████| 208069/208069 [00:28<00:00, 7239.52it/s]


In [None]:
# 2. prepare data for validation set
statesVal = []
actionsVal = []
rewardsVal = []
terminalVal = []  # terminal flag: 0 = game continue, 1 = game stop
for i in tqdm(range(userFeaturesVal.shape[0])):
# loop through samples
    state = list(userFeaturesVal.iloc[i])
    itemList = recItemsVal[i]
    purchase = purchaseLabelVal[i]
    for step in range(9):
        # check if game is still running
        if step>2 and purchase[0]*purchase[1]*purchase[2]==0:
            # stop adding to data set if game stopped
            break
        if step>5 and purchase[3]*purchase[4]*purchase[5]==0:
            # stop adding to data set if game stopped
            break
        # after passing check, we can add new record to val set
        # append step to state
        statesVal.append(state + [step])
        # action = itemID
        itemID = itemList[step]
        actionsVal.append(itemID)
        # calculate reward
        if purchase[step]==1:
            rewardsVal.append(itemPrice[itemID-1]) # itemID-1 becuase itemPrice is a 0-based array
        else:
            rewardsVal.append(0)
        # terminal flag: determine by looking at previous purchase flags
        if step<2:
            terminalVal.append(0) # game continue
        elif step==2 and purchase[0]*purchase[1]*purchase[2]==0:
            terminalVal.append(1) # game stop
        elif step<5:
            terminalVal.append(0) # game continue
        elif step==5 and purchase[3]*purchase[4]*purchase[5]:
            terminalVal.append(1) # game stop
        elif step<8:
            terminalVal.append(0) # game continue
        else:
            terminalVal.append(1) # game stop


# ### terminal flags: all 1
statesVal = np.array(statesVal)
actionsVal = np.array(actionsVal)
rewardsVal = np.array(rewardsVal)
terminalVal = np.array(terminalVal)
datasetVal = MDPDataset(statesVal, actionsVal, rewardsVal, terminalVal, discrete_action = True)

 88%|████████▊ | 45594/52018 [00:05<00:00, 7663.70it/s]

In [None]:
# save data to checkpoint
import pickle
with open('/tf/shared/checkpoints/data-3D-50DFeatures.pkl', 'wb') as file:
    pickle.dump((statesTrain, actionsTrain, rewardsTrain, terminalTrain, statesVal, actionsVal, rewardsVal, terminalVal),
                file, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# reload checkpoint
import pickle
with open('/tf/shared/checkpoints/data-3D-50DFeatures.pkl', 'rb') as file:
    statesTrain, actionsTrain, rewardsTrain, terminalTrain, statesVal, actionsVal, rewardsVal, terminalVal = pickle.load(file)

from d3rlpy.dataset import MDPDataset
datasetTrain = MDPDataset(statesTrain, actionsTrain, rewardsTrain, terminalTrain, discrete_action = True)
datasetVal = MDPDataset(statesVal, actionsVal, rewardsVal, terminalVal, discrete_action = True)

In [None]:
from classes import d3rlpy_wrapper
from importlib import reload
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="1"
d3rlpy_wrapper = reload(d3rlpy_wrapper)

wrapper = d3rlpy_wrapper.RLModelWrapper(datasetTrain)
wrapper.trainAllModels(n_epochs = 25)

In [None]:
# save models to checkpoints
import pickle
with open('/tf/shared/checkpoints/models-3D-50DFeatures.pkl', 'wb') as file:
    pickle.dump((wrapper.DQN, wrapper.DoubleDQN, wrapper.DiscreteBCQ, wrapper.DiscreteCQL),
                file, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# reload checkpoint
import pickle
from classes import d3rlpy_wrapper
wrapper = d3rlpy_wrapper.RLModelWrapper(datasetTrain, datasetVal)
with open('/tf/shared/checkpoints/models-3D-50DFeatures.pkl', 'rb') as file:
    wrapper.DQN, wrapper.DoubleDQN, wrapper.DiscreteBCQ, wrapper.DiscreteCQL = pickle.load(file)


In [None]:
###### Now we prepare test set to make prediction
userIDs, userFeaturesTest = getUserFeaturesTestSet()
statesTest = []  # this will be userFeaturesTest appended with a column of step = 0 to 8
for i in tqdm(range(userFeaturesTest.shape[0])):
# loop through samples
    state = list(userFeaturesTest.iloc[i])
    for step in range(9):
        # append step to state
        statesTest.append(state + [step])

print(len(statesTest))
statesTest = np.array(statesTest)

In [None]:
# generate prediction for this expanded test sets, number of rows = nrows(test set) * 9
itemSetDQN, itemSetDoubleDQN, itemSetDiscreteBCQ, itemSetDiscreteCQL = wrapper.predict9ItemsAllModels(statesTest)

In [None]:
# function to concatenate multiple rows of itemSet into a single set for each user sample
# for each sample:
#     finalSet = []
#     for each step in sample:
#          iterate thru recommended items and add to finalSet if that item is not already in finalSet
def finalizeItemSetsTestSet(statesInput, itemSet):
    output = []
    for i in tqdm(range(statesInput.shape[0])):
        # loop through expanded samples
        state = list(statesInput[i])
        step = state[len(state)-1]
        if step==0: # init new finalItemSet
            finalItemSet = []
        # try to add new item to finalItemSet, based on their highest value
        for item in itemSet[i]:
            if item not in finalItemSet:
                finalItemSet.append(item)
                break
        # export finalItemSet once reaching step 8
        if step==8:
            assert len(finalItemSet)==9
            output.append(finalItemSet)
    return output


In [None]:
# finalItemSetDQN = finalizeItemSetsTestSet(statesTest, itemSetDQN)
# finalItemSetDoubleDQN = finalizeItemSetsTestSet(statesTest, itemSetDoubleDQN)
finalItemSetDiscreteBCQ = finalizeItemSetsTestSet(statesTest, itemSetDiscreteBCQ)
# finalItemSetDiscreteCQL = finalizeItemSetsTestSet(statesTest, itemSetDiscreteCQL)


In [None]:
# function to save results to output
# format data according to submission format and write to file
def writeOutput(finalItemSet, outFileName, userIDs_ = userIDs, outDir = '/tf/shared/outputs/'):
    outFile = outDir + outFileName
    f = open(outFile, "w")
    f.write('id,itemids')
    for i in range(len(userIDs_)):
        f.write('\n')
        itemList = finalItemSet[i]
        itemString = ' '.join([str(j) for j in itemList])
        outString = str(userIDs_[i]) + ',' + itemString
        f.write(outString)

In [None]:
# writeOutput(finalItemSetDQN, 'DQN_20DFeatures.csv')
# writeOutput(finalItemSetDoubleDQN, 'DoubleDQN_20DFeatures.csv')
writeOutput(finalItemSetDiscreteBCQ, 'DiscreteBCQ_50DFeatures.csv')
# writeOutput(finalItemSetDiscreteCQL, 'DiscreteCQL_20DFeatures.csv')

In [None]:
####################### Now we calculate our Metrics1 on the 4 models ##################################
from classes.Metrics import Metrics
metrics = Metrics(recItemsVal, purchaseLabelVal, itemPrice)

################### generate prediction for expanded val set
itemSetDQN_val, itemSetDoubleDQN_val, itemSetDiscreteBCQ_val, itemSetDiscreteCQL_val = wrapper.predict9ItemsAllModels(statesVal)

# function to concatenate multiple rows of val itemSet into a single set for each user sample
# because of the way we organize data, each sample is divided into multiple rows
def finalizeItemSets(statesInput, itemSet):
    """ statesInput: input for models to generate itemSet """
    finalItemSet = []
    for i in tqdm(range(statesInput.shape[0])):
    # loop thru multiple rows of samples
        # get step of the game in this row (step is range from 0 to 8), step is the last column
        # we only get itemset from the first step = 0
        state = list(statesInput[i])
        step = state[len(state)-1]
        if step==0:
            finalItemSet.append(itemSet[i])
    return finalItemSet

# finalItemSetDQN_val = finalizeItemSets(statesVal, itemSetDQN_val)
# finalItemSetDoubleDQN_val = finalizeItemSets(statesVal, itemSetDoubleDQN_val)
finalItemSetDiscreteBCQ_val = finalizeItemSets(statesVal, itemSetDiscreteBCQ_val)
# finalItemSetDiscreteCQL_val = finalizeItemSets(statesVal, itemSetDiscreteCQL_val)
# assert len(finalItemSetDQN_val) == userFeaturesVal.shape[0]
# assert len(finalItemSetDoubleDQN_val) == userFeaturesVal.shape[0]
assert len(finalItemSetDiscreteBCQ_val) == userFeaturesVal.shape[0]
# assert len(finalItemSetDiscreteCQL_val) == userFeaturesVal.shape[0]

In [None]:
# metrics1_DQN = metrics.calculate_metrics1(finalItemSetDQN_val)
# metrics1_DoubleDQN = metrics.calculate_metrics1(finalItemSetDoubleDQN_val)
metrics1_DiscreteBCQ = metrics.calculate_metrics1(finalItemSetDiscreteBCQ_val)
# metrics1_DiscreteCQL = metrics.calculate_metrics1(finalItemSetDiscreteCQL_val)

In [None]:
# print(metrics1_DQN)
# print(metrics1_DoubleDQN)
print(metrics1_DiscreteBCQ)
# print(metrics1_DiscreteCQL)