In [None]:
import numpy as np
import pandas as pd
import Algorithms.baselines as base
import Algorithms.context_knn as cknn
import Algorithms.SeqContextKNN as scknn
import Algorithms.gru4rec as gru4rec
import Algorithms.svmknn as svmknn
import time
import pickle
import argparse
import os
import itertools

In [None]:
def trainBPR(lambda_session,lambda_item):
    algo = base.BPR(lambda_session = lambda_session,lambda_item = lambda_item)

    print("Training algorithm: BPR with lambda_session {} and lambda_item {}".format(lambda_session,lambda_item))

    algo.fit(train)

    # save the model to disk
    filename = "models/valid/cb12_BPR_ls"  + str(lambda_session) + 'li'+ str(lambda_item) + ".model"
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    print("Finished training. Storing model to: " + filename)
    with open(filename,'wb') as f:
        pickle.dump(algo,f)
    pass

In [None]:
def trainIKNN(lmbd,alpha):
    algo = base.ItemKNN(lmbd=lmbd, alpha=alpha)

    print("Training algorithm: ItemKNN with lambda {} and alpha {}".format(lmbd,alpha))

    algo.fit(train)

    # save the model to disk
    filename = "models/valid/cb12_IKNN_lmbd" + str(lmbd) + 'alpha' + str(alpha) + ".model"
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    print("Finished training. Storing model to: " + filename)
    with open(filename,'wb') as f:
        pickle.dump(algo,f)
    pass

In [None]:
def trainSKNN(k, sampling, similarity, pop_boost):
    """
    Also known as context knn or cknn
    k = 500
    sampling = 1000
    """
    algo = cknn.ContextKNN(k = k, sampling=sampling, similarity = similarity, pop_boost = pop_boost)

    print("Training algorithm: SKNN with k {}, sampling {}, similarity {} and pop_boost {}".format(k, sampling, similarity,
                                                                                                pop_boost))

    algo.fit(train)

    # save the model to disk
    filename = "models/valid/cb12_SKNN_k" + str(k) + 'Smpl' + str(sampling) + 'Sim' + str(similarity) + 'Pop_boost' + str(pop_boost) + ".model"
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    print("Finished training. Storing model to: " + filename)
    with open(filename,'wb') as f:
        pickle.dump(algo,f)
    pass

In [None]:
def trainSsKNN(k,sampling,similarity,pop_boost):

    """
    Also known as sequental context KNN
    k = 500
    sampling = 1000
    """

    algo = scknn.SeqContextKNN(k=k, sampling=sampling, similarity = similarity, pop_boost = pop_boost)

    print("Training algorithm: S-sKNN with k {}, sampling {}, similarity {} and pop_boost {}".format(k, sampling, similarity,
                                                                                                pop_boost))

    algo.fit(train)

    # save the model to disk
    filename = "models/valid/cb12_S-sKNN_k" + str(k) + 'Smpl' + str(sampling) + 'Sim' + str(similarity) + 'Pop_boost' + str(pop_boost) + ".model"
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    print("Finished training. Storing model to: " + filename)
    with open(filename,'wb') as f:
        pickle.dump(algo,f)
    pass

In [None]:
def trainSVMKNN(k,sampling,similarity,pop_boost,weighting):
    """
    k = 500
    sampling = 1000
    """

    algo = svmknn.VMContextKNN(k=k, sampling=sampling, similarity = similarity, pop_boost = pop_boost, weighting = weighting)

    print("Training algorithm: SVMKNN with k {}, sampling {}, similarity {}, pop_boost {} and weighting {}".format(k, sampling, similarity,
                                                                                                pop_boost, weighting))

    algo.fit(train)

    # save the model to disk
    filename = "models/valid/cb12_SVMKNN_k" + str(k) + 'Smpl' + str(sampling) + 'Sim' + str(similarity) + 'Pop_boost' + str(pop_boost) \
        + 'WeiFun' + weighting + ".model"
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    print("Finished training. Storing model to: " + filename)
    with open(filename,'wb') as f:
        pickle.dump(algo,f)
    pass

In [None]:
def trainGRU4Rec(loss, layers, dropout_p_hidden, batch_size):
    # save the model to disk
    layers_str = '_'.join(str(x) for x in layers)
    params = '_loss' + loss + '_layers' + layers_str  + '_drop' + str(dropout_p_hidden) + '_batch' + str(batch_size)
    filename = "models/valid/cb12_GRU4Rec" + params + ".model"
    if os.path.isfile(filename):
        print("cb12_GRU4Rec" + params + ".model already exists. Skipping training.")
        return

    algo = gru4rec.GRU4Rec(
        loss=loss,
        final_act='linear', 
        hidden_act='tanh', 
        layers=layers, 
        batch_size=batch_size, 
        dropout_p_hidden=dropout_p_hidden, 
        learning_rate=0.2, 
        momentum=0.0, 
        n_sample=2048, 
        sample_alpha=0, 
        time_sort=True
    )

    print("Training algorithm: GRU4Rec with loss {}, layers {}, dropout {} and batch size {}".format(loss, layers_str, dropout_p_hidden, batch_size))

    algo.fit(train)

    os.makedirs(os.path.dirname(filename), exist_ok=True)
    print("Finished training. Storing model to: " + filename)
    with open(filename,'wb') as f:
        pickle.dump(algo,f)
    pass

In [None]:
train_path = '../../data/cb12/processed/valid_train_14d.csv'
train = pd.read_csv(train_path, sep='\t')[['session_id', 'item_id', 'created_at']]
train.columns = ['SessionId', 'ItemId', 'Time']

In [None]:
# Training GRU4Rec models
loss = ["top1-max", "bpr-max-0.5"]
layers = [[100], [100,100], [1000], [1000,1000]]
dropout_p_hidden = [0.0, 0.2, 0.5]
batch_size = [32, 128, 512]
gru_params = list(itertools.product(loss, layers, dropout_p_hidden, batch_size))
for i in range (len(gru_params)):
    trainGRU4Rec(gru_params[i][0], gru_params[i][1], gru_params[i][2], gru_params[i][3])

In [None]:
# Training BPR models
lambda_session = [0, 0.25, 0.5]
lambda_item = [0, 0.25, 0.5]
bpr_params = list(itertools.product(lambda_session, lambda_item))
for i in range (len(bpr_params)):
    trainBPR(bpr_params[i][0], bpr_params[i][1])

In [None]:
# Training IKNN models
lambd = [20, 50, 80]
alpha = [0.25, 0.5, 0.75]
iknn_params = list(itertools.product(lambd, alpha))
for i in range(len(iknn_params)):
    trainIKNN(iknn_params[i][0], iknn_params[i][1])

In [None]:
#Training sKNN

k = [100, 200, 500, 1000]
sampling = ["recent", "random"]
similarity = ["jaccard", "cosine"]
pop_boost = [0, 1]
sknn_params = list(itertools.product(k, sampling, similarity, pop_boost))

for i in range (len(sknn_params)):
    trainSKNN((sknn_params[i])[0],(sknn_params[i])[1],(sknn_params[i])[2],(sknn_params[i])[3])

In [None]:
# Training S-sknn with same params as sknn

for i in range(len(sknn_params)):
    trainSsKNN((sknn_params[i])[0], (sknn_params[i])[1], (sknn_params[i])[2], (sknn_params[i])[3])

In [None]:
# Training SVKNN with same params as sknn

weighting_fun = ["div", "log", "quadratic"]

for i in range(len(sknn_params)):
    for fun in weighting_fun:
        trainSVMKNN((sknn_params[i])[0],(sknn_params[i])[1],(sknn_params[i])[2],(sknn_params[i])[3], fun)