In [1]:
##########################################################
# Q-learning implementation with 3D Item Sets, game has 3 states 0, 1, 2
# 1. split train data into training set and test set
# 2. train Q-Tables on Training set
# 3. make suggestions for test set
# 4. Calculate Metrics 1 for our suggestions
# 5. Make prediction for the competition's test set
#########################################################

In [1]:
# 1. Split Train 
from DataPrep import *
userFeaturesTrain, recItemsTrain, purchaseLabelTrain, userFeaturesVal, recItemsVal, purchaseLabelVal = splitTrainSet()
userFeaturesTrain = pd.concat((userFeaturesTrain, userFeaturesVal), ignore_index=True)
recItemsTrain = np.vstack((recItemsTrain, recItemsVal))
purchaseLabelTrain = np.vstack((purchaseLabelTrain, purchaseLabelVal))

Number of Multiprocessing threads: 31


In [2]:
# dimension reduction with PCA and clustering
# comment this part out to use original user features 

# cluster model userFeatures of by 60D features
from DataPrep import getClusterModel200_20D
PCAModel, clusterModel = getPCA_Clustering_Models()
NCLUSTERS = 100

userFeaturesTrain_transformed = PCAModel.transform(userFeaturesTrain)
clusterLabelsTrain = clusterModel.predict(userFeaturesTrain_transformed)

userFeaturesVal_transformed = PCAModel.transform(userFeaturesVal)
clusterLabelsVal = clusterModel.predict(userFeaturesVal_transformed)

# load item info
from classes.Items import Items
itemInfo = Items()
# load itemset info
from classes.ItemSet import ItemSet3
itemSets = ItemSet3()
N_Sets = itemSets.getNSets()
print('Number of Item Sets 3: ' + str(N_Sets))


Number of Item Sets 3: 112368


In [3]:
# 2.
import numpy as np
from tqdm import tqdm
from classes.QLearning2 import *

# initialize Q tables 
QLModels = []
trainData = []
for i in range(NCLUSTERS):
    QLModels.append(QLearning(n_states = 3, n_actions = N_Sets))
    trainData.append([])

# to train Q tables: 
#### state: 0, 1, or 2
#### action: the itemSet recommended
#### reward: (item is purchased) * price
#### nextState: 1 or 2. -1 if there is no next state
#### to feed a set of (state, action, reward) to a Q table
for i in tqdm(range(len(recItemsTrain))):
# loop thru samples
    recItems = recItemsTrain[i]
    purLabel = purchaseLabelTrain[i]
    for j in [0, 3, 6]: # process each Set3 at once
        if j>2 and purLabel[0]*purLabel[1]*purLabel[2]==0:
            # don't train if game stopped
            break
        if j>5 and purLabel[3]*purLabel[4]*purLabel[5]==0:
            # don't train if game stopped
            break
        # calculate state:
        state = int(j/3)
        # next state:
        if j==0:
            if purLabel[0]*purLabel[1]*purLabel[2]==0: # terminated
                nextState = -1
            else:
                nextState = 1
        elif j==3:
            if purLabel[3]*purLabel[4]*purLabel[5]==0: # terminated
                nextState = -1
            else:
                nextState = 2
        else:
            nextState = -1
        
        # calculate action:
        itemSet = [recItems[j], recItems[j+1], recItems[j+2]]
        action = itemSets.getSetID(itemSet)

        # calculate rewards, note: itemPrice is an array, itemID from raw data is 1-based index
        prices = [itemInfo.getItemPrice(itemSet[0]), itemInfo.getItemPrice(itemSet[1]), itemInfo.getItemPrice(itemSet[2])]
        labels = [purLabel[j], purLabel[j+1], purLabel[j+2]]
        reward = sum([prices[t]*labels[t] for t in range(3)])

        train_data = (state, action, reward, nextState)
        # predict user cluster label of this sample based on user features
        clusterID = clusterLabelsTrain[i]
        trainData[clusterID].append(train_data)


100%|██████████| 260087/260087 [00:15<00:00, 16327.25it/s]


In [4]:
for i in tqdm(range(NCLUSTERS)):
    QLModels[i].trainParallel(trainData[i])

100%|██████████| 100/100 [05:11<00:00,  3.11s/it]


In [5]:
# 3. make suggestion for Val set
# make suggestion for each test sample
for i in range(NCLUSTERS):
    QLModels[i].initPredCache()

items_out_val = []
for cluster in tqdm(clusterLabelsVal):
# clusterLabelsVal is array of labels of users in Val set
    model = QLModels[cluster]
    bestSetIDs = QLModels[cluster].predictBestK(2, 1000)
    items4Sample = []
    for setID in bestSetIDs:
        items = itemSets.getItemSet(setID)
        if items[0] not in items4Sample and items[1] not in items4Sample and items[2] not in items4Sample:
            items4Sample.extend(list(items))
        if len(items4Sample)==9:
            break
    items_out_val.append(items4Sample)
assert len(items_out_val)==len(clusterLabelsVal)

100%|██████████| 52018/52018 [00:03<00:00, 13660.08it/s]


In [10]:
# Calculate Metrics 1 for our suggestions

In [7]:
# 4. calculate metrics
from classes.Metrics import Metrics
metrics = Metrics(recItemsVal, purchaseLabelVal)
score = metrics.calculate_metrics2(items_out_val, w3 = 1000)
print(score)
# calculate metrics of test set (max score possible by ground truth)
score_max = metrics.calculate_metrics2(recItemsVal, w3 = 1000)
print(score_max) # max score possible
print('percentage of max score: ' + str(score/score_max))

103103735000
159061267871
percentage of max score: 0.6482013904454602


In [8]:
# 5. make suggestion for competition's test set
# predict user cluster label for users in the test set
userIDs, userFeaturesTest = getUserFeaturesTestSet()
userFeaturesTest_transformed = PCAModel.transform(userFeaturesTest)
clusterLabelsTest = clusterModel.predict(userFeaturesTest_transformed)

In [9]:
# make suggestion for each test sample
for i in range(NCLUSTERS):
    QLModels[i].initPredCache()

items_out_test = []
for cluster in tqdm(clusterLabelsTest):
# clusterLabelsVal is array of labels of users in Val set
    model = QLModels[cluster]
    bestSetIDs = QLModels[cluster].predictBestK(2, 1000)
    items4Sample = []
    for setID in bestSetIDs:
        items = itemSets.getItemSet(setID)
        if items[0] not in items4Sample and items[1] not in items4Sample and items[2] not in items4Sample:
            items4Sample = list(items) + items4Sample
        if len(items4Sample)==9:
            break
    assert len(items4Sample)==9
    items_out_test.append(items4Sample)
assert len(items_out_test)==len(clusterLabelsTest)

100%|██████████| 206096/206096 [00:07<00:00, 27528.33it/s]


In [10]:
# write recommended items to output csv file
from classes.output import writeOutput
writeOutput(items_out_test, 'QLearning-3D-PCA-Cluster-v2.csv', userIDs)

In [11]:
print(items_out_test[:50])


[[171, 200, 220, 172, 219, 234, 160, 191, 214], [172, 199, 235, 164, 200, 233, 171, 234, 237], [171, 200, 220, 172, 219, 234, 160, 191, 214], [172, 199, 221, 171, 200, 220, 158, 159, 213], [172, 199, 221, 164, 200, 234, 171, 192, 235], [172, 199, 220, 164, 200, 234, 157, 171, 222], [172, 199, 234, 160, 196, 200, 164, 233, 235], [171, 234, 235, 172, 199, 213, 158, 164, 214], [172, 199, 235, 164, 200, 234, 160, 196, 237], [160, 199, 200, 172, 234, 235, 164, 233, 237], [160, 196, 199, 164, 200, 233, 172, 234, 237], [172, 199, 221, 164, 200, 234, 160, 188, 235], [172, 199, 233, 164, 200, 239, 160, 216, 234], [172, 199, 234, 164, 200, 233, 160, 188, 196], [172, 199, 235, 164, 200, 233, 171, 234, 237], [172, 199, 234, 164, 194, 200, 171, 233, 237], [172, 199, 221, 171, 200, 220, 158, 159, 213], [172, 199, 221, 164, 200, 235, 171, 233, 239], [172, 233, 235, 164, 199, 200, 160, 234, 248], [172, 199, 234, 164, 194, 200, 171, 233, 237], [172, 234, 235, 164, 200, 233, 158, 168, 199], [171, 200, 2

dtype('float32')