In [1]:
###############################################################################
# BCQ to predict first 3D item set
# Q Learning to predict the other 2 item sets 
#     Q Table: state = itemSetID
#              action = itemSetID
#
# 0. split train data into training set and validation set
# ----- Train BCQ -----------------
# 1.1 prepare data for training set:
#       step: 0, 1, 2, ..., 9
#       state: userFeatures20D + [step]
#       action: itemID
# 1.2 Train BCQ
#
# ----- Train Q Learning ----------
# 2.1 prepare data for training set:
#     state: itemSetID
#     action itemSetID
# 2.2 Train Q Learning
# 
# ----- Prediction ----------------
# 3.1 transform userFeaturesTest to 20D by using PCA
# 3.2 Make prediction of the first itemSet by using BCQModel, name it itemSet1
# 3.3 use itemSet1 as state for QLModel to predict best itemSet2
# 3.4 use itemSet2 as state for QLModel to predict best itemSet3
###############################################################################

In [1]:
# 1. Split Train 
from DataPrep import *
from tqdm import tqdm
userFeaturesTrain, recItemsTrain, purchaseLabelTrain, userFeaturesVal, recItemsVal, purchaseLabelVal = splitTrainSet()
# when training, userFeaturesTrain represent state
N_ITEMS = 381
# load item info
from classes.Items import Items
itemInfo = Items()


Number of Multiprocessing threads: 31


In [19]:
# ----------------------------- Train Q Learning --------------------------------------------


In [5]:
# 2.
import numpy as np
from tqdm import tqdm
from classes.QLearning2 import *

# predict user cluster label for users in the training set
NCLUSTERS = 200

# load itemset info
from classes.ItemSet import ItemSet3
itemSets = ItemSet3()
N_Sets = itemSets.getNSets()
print('Number of Item Sets 3: ' + str(N_Sets), flush = True)

# to train Q tables: 
#### state: 0, 1, or 2
#### action: the itemSet recommended
#### reward: (item is purchased) * price
#### nextState: 1 or 2. -1 if there is no next state
#### to feed a set of (state, action, reward) to a Q table
trainSetQL = []
for i in tqdm(range(len(recItemsTrain))):
# loop thru samples
    recItems = recItemsTrain[i]
    purLabel = purchaseLabelTrain[i]
    for j in [0, 3, 6]: # process each Set3 at once
        if j>2 and purLabel[0]*purLabel[1]*purLabel[2]==0:
            # don't train if game stopped
            break
        if j>5 and purLabel[3]*purLabel[4]*purLabel[5]==0:
            # don't train if game stopped
            break
        # calculate state:
        state = j/3
        # next state:
        if j==0:
            if purLabel[0]*purLabel[1]*purLabel[2]==0: # terminated
                nextState = -1
            else:
                nextState = 1
        elif j==3:
            if purLabel[3]*purLabel[4]*purLabel[5]==0: # terminated
                nextState = -1
            else:
                nextState = 2
        else:
            nextState = -1
        
        # calculate action:
        itemSet = [recItems[j], recItems[j+1], recItems[j+2]]
        action = itemSets.getSetID(itemSet)

        # calculate rewards, note: itemPrice is an array, itemID from raw data is 1-based index
        prices = [itemInfo.getItemPrice(itemSet[0]), itemInfo.getItemPrice(itemSet[1]), itemInfo.getItemPrice(itemSet[2])]
        labels = [purLabel[j], purLabel[j+1], purLabel[j+2]]
        reward = sum([prices[t]*labels[t] for t in range(3)])

        trainSetQL.append((state, action, reward, nextState))


  2%|▏         | 3941/208069 [00:00<00:10, 19834.12it/s]

Number of Item Sets 3: 112368


100%|██████████| 208069/208069 [00:10<00:00, 19818.37it/s]


In [9]:
# 2.2 Train QL model
# initialize
from classes import QLearning2
from importlib import reload  
QLearning2 = reload(QLearning2)

print('N_ACTIONS: ' + str(3) + ' N_STATES: ' + str(N_Sets))
QLModel = QLearning2.QLearning(n_states = 3, n_actions = N_Sets)
# train in parallel
QLModel.trainParallel(trainSetQL)
# QLModel.train(trainSetQL)


N_ACTIONS: 3 N_STATES: 112368
parallel training ... 


In [18]:
# best candidate sets for step 3
QLModel.initPredCache()
candidateSetIDs = QLModel.predictBestK(2, 100)
candidateItemSets = []
for setID in candidateSetIDs:
    items = itemSets.getItemSet(setID)
    candidateItemSets.append(items)


[(158, 171, 214), (171, 172, 188), (171, 200, 234), (160, 199, 200), (172, 192, 234), (164, 171, 214), (164, 172, 233), (160, 172, 239), (160, 172, 199), (172, 192, 237), (172, 214, 235), (160, 235, 239), (171, 172, 237), (164, 172, 188), (172, 190, 199), (160, 172, 235), (164, 171, 237), (172, 188, 199), (171, 172, 214), (160, 238, 240), (171, 234, 237), (160, 164, 213), (171, 194, 234), (160, 234, 238), (160, 171, 204), (172, 220, 233), (164, 172, 219), (171, 234, 235), (164, 172, 183), (164, 172, 215), (172, 214, 239), (172, 199, 235), (172, 204, 237), (172, 199, 214), (172, 191, 234), (172, 217, 235), (148, 172, 234), (172, 194, 238), (160, 189, 235), (172, 190, 234), (160, 215, 233), (154, 164, 171), (150, 158, 233), (148, 171, 219), (172, 205, 234), (172, 213, 235), (164, 172, 217), (164, 172, 212), (172, 220, 234), (172, 235, 237), (172, 191, 235), (172, 199, 238), (164, 171, 215), (164, 172, 221), (172, 213, 237), (171, 200, 237), (172, 199, 213), (172, 199, 237), (164, 172, 23

In [28]:
# get the first 6 from BCQ
import pandas as pd
BCQoutput = pd.read_csv('./outputs/DiscreteBCQ_20DFeatures.csv')
BCQItems = []
for i in range(BCQoutput.shape[0]):
    items = [int(x) for x in BCQoutput.itemids[i].split(' ')]
    BCQItems.append(items)


[[220, 221, 240, 196, 238, 214, 218, 239, 233], [220, 240, 221, 196, 238, 214, 239, 218, 233], [220, 221, 240, 196, 238, 214, 218, 239, 233], [220, 221, 240, 196, 238, 214, 239, 218, 233], [220, 240, 221, 196, 238, 239, 214, 218, 233], [220, 240, 221, 196, 238, 214, 239, 218, 233], [220, 221, 240, 196, 238, 214, 218, 239, 233], [220, 221, 240, 196, 238, 214, 239, 218, 233], [220, 240, 221, 196, 238, 239, 214, 218, 233], [220, 240, 221, 238, 196, 239, 214, 218, 233]]


In [10]:
######################## PREDICTION ##########################################
userIDs, userFeaturesTest = getUserFeaturesTestSet()

items_out = []
for i in range(BCQoutput.shape[0]):
    First6 = BCQItems[i][:6]
    for items in candidateItemSets:
        if (items[0] not in First6)  and (items[1] not in First6)  and (items[2] not in First6):
            items_out.append(First6 + list(items))
            


In [11]:
# 3.1 transform userFeaturesTest to 20D by using PCA
userFeaturesTest = pd.DataFrame(PCAtransformer.transform(userFeaturesTest))

In [None]:
# predict first 6 items with BCQ
from classes.d3rlpy_wrapper import predictBestK, finalizeItemSetsTestSet
statesTest = []  # this will be userFeaturesTest appended with a column of step = 0 to 8
for i in tqdm(range(userFeaturesTest.shape[0])):
# loop through samples
    state = list(userFeaturesTest.iloc[i])
    for step in range(6):
        # append step to state
        statesTest.append(state + [step])
statesTest = np.array(statesTest)
bestItems_6xSamples = predictBestK(BCQModel, statesTest, 6)
bestFirst6tems = finalizeItemSetsTestSet(bestItems_6xSamples)


# QLModel.initPredCache()
# recItems_test = []
# for i in tqdm(range(userFeaturesTest.shape[0])):
# # loop thru samples
#     recItems = []  # recommended list for this sample
#     # 3.2 Make prediction of the first itemSet by using BCQModel, name it itemSet1
#     state = userFeaturesTest.iloc[i]  # first step of the game
#     itemSetID1 = BCQModel.predict([np.array(state)])[0]
#     recItems.extend(list(itemSet3.getItemSet(itemSetID1)))
#     # 3.3 use itemSet1 as state for QLModel to predict best itemSet2
#     # now stateID = itemSetID1
#     candidateSetIDs = QLModel.predictBestK(itemSetID1, 20)
#     for setID in candidateSetIDs:
#         items = itemSet3.getItemSet(setID)
#         if (items[0] not in recItems) and (items[1] not in recItems) and (items[2] not in recItems):
#             # we have found a suitable solution for step 2
#             itemSetID2 = setID
#             recItems.extend(list(items))
#             break
#     # 3.4 use itemSet2 as state for QLModel to predict best itemSet3
#     # now stateID = itemSetID2
#     candidateSetIDs = QLModel.predictBestK(itemSetID2, 20)
#     for setID in candidateSetIDs:
#         items = itemSet3.getItemSet(setID)
#         if (items[0] not in recItems) and (items[1] not in recItems) and (items[2] not in recItems):
#             # we have found a suitable solution for step 2
#             itemSetID3 = setID
#             recItems.extend(list(items))
#             break
#     recItems_test.append(recItems)


100% 206096/206096 [00:22<00:00, 9229.85it/s] 
100% 381/381 [57:54<00:00,  9.12s/it]
100% 1236576/1236576 [05:27<00:00, 3775.36it/s]


In [13]:
bestFirst6tems = finalizeItemSetsTestSet(statesTest, bestItems_3xSamples, 6)
assert len(bestFirst6tems)==len(userIDs)


100% 1236576/1236576 [00:11<00:00, 104933.51it/s]


In [20]:
# predict last 3 items with QL
QLModel.initPredCache()
bestItemSetIDs = QLModel.predictBestK(2, 100)
finalItems = []
for i in tqdm(range(len(bestFirst6tems))):
    first6 = bestFirst6tems[i]
    for setID in bestItemSetIDs:
        items = itemSet3.getItemSet(setID)
        if (items[0] not in first6) and (items[1] not in first6) and (items[2] not in first6):
            finalItems.append(first6 + list(items))
            break



100% 206096/206096 [00:02<00:00, 99717.87it/s] 


In [21]:
print(finalItems[:50])

[[220, 196, 221, 95, 97, 48, 171, 172, 188], [220, 196, 221, 95, 97, 42, 171, 172, 188], [220, 196, 221, 95, 97, 48, 171, 172, 188], [220, 196, 221, 98, 97, 29, 171, 172, 188], [220, 196, 218, 240, 238, 29, 171, 172, 188], [220, 196, 221, 218, 97, 29, 171, 172, 188], [220, 196, 218, 221, 98, 29, 171, 172, 188], [220, 196, 221, 95, 97, 48, 171, 172, 188], [220, 196, 221, 95, 97, 42, 171, 172, 188], [220, 196, 221, 218, 97, 29, 171, 172, 188], [220, 196, 218, 221, 98, 29, 171, 172, 188], [220, 196, 218, 240, 238, 29, 171, 172, 188], [220, 196, 221, 95, 97, 42, 171, 172, 188], [220, 196, 221, 95, 97, 48, 171, 172, 188], [220, 196, 221, 95, 97, 42, 171, 172, 188], [220, 196, 221, 95, 97, 42, 171, 172, 188], [220, 196, 221, 95, 97, 42, 171, 172, 188], [220, 196, 218, 240, 98, 29, 171, 172, 188], [220, 196, 221, 95, 97, 48, 171, 172, 188], [220, 196, 221, 218, 97, 29, 171, 172, 188], [220, 196, 218, 240, 238, 29, 171, 172, 188], [220, 196, 218, 240, 238, 29, 171, 172, 188], [220, 196, 218, 2

In [23]:
# write recommended items to output csv file
from classes.output import writeOutput
writeOutput(finalItems, 'BCQ-QLearning_v2.csv', userIDs)