In [5]:
import pandas as pd
from src.recsys_baseline import VSKNN_STAN
from src.evaluation import evaluate_sessions, MRR, HitRate
from src.utils import evaluation_results_to_csv

# Item knn sesion based recommendation
This is the base model that will be used to compare the RL model with.

# Initialise the model (`VSKNN_STAN`)
Model parameters are based on research of [S. Latifi, N. Mauro and D. Jannach. 2021. Session-aware recommendation: a surprising quest for the state-of-the-art. Information Sciences](https://doi.org/10.1016/j.ins.2021.05.048) 

* Optimised parameters for Diginetica dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/diginetica/window/window_multiple_digi_vstan.yml)  
    k: 100  
    sample_size: 1000  
    similarity: 'vec'  
    stan:  
    lambda_spw: 4.9  
    lambda_snh: 80  
    lambda_inh: 9.8  
    vsknn:  
    lambda_ipw: 4.9  
    lambda_idf: 5  


* Optimised parameters for Retail Rocket dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/retailrocket/session_based/window/window_retailr_vstan.yml)  
    k: 500  
    sample_size: 1000  
    similarity: 'cosine'  
    stan:  
    lambda_spw: 7.24  
    lambda_snh: 100  
    lambda_inh: 3.62  
    vsknn:  
    lambda_ipw: 3.62  
    lambda_idf: 1  

## Diginetica

In [6]:
train_data = pd.read_csv(r'data\processed datasets\diginetica\interactions_train_tr.txt',
                    sep='\t',)
display(train_data.head())
display(train_data.shape)

Unnamed: 0,Type,SessionId,ItemId,Time,Date,Datestamp,TimeO,ItemSupport
0,view,1,9654,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:01:15.848000+00:00,74
1,view,1,33043,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:02:53.912000+00:00,41
2,view,1,32118,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:04:03.569000+00:00,19
3,view,1,12352,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:05:29.870000+00:00,79
4,view,1,35077,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:06:30.072000+00:00,47


(102272, 8)

In [7]:
test_data = pd.read_csv(r'data\processed datasets\diginetica\interactions_test.txt',
                    sep='\t',)

display(test_data.head())
display(test_data.shape)

Unnamed: 0,Type,SessionId,ItemId,Time,Date,Datestamp,TimeO,ItemSupport
0,view,289,125013,1464221000.0,2016-05-26,1464221000.0,2016-05-26 00:00:18.301000+00:00,7
1,view,289,64068,1464222000.0,2016-05-26,1464221000.0,2016-05-26 00:14:07.735000+00:00,6
2,view,289,133346,1464222000.0,2016-05-26,1464221000.0,2016-05-26 00:14:38.934000+00:00,6
3,view,289,198930,1464222000.0,2016-05-26,1464221000.0,2016-05-26 00:18:48.607000+00:00,7
4,view,302,36202,1464221000.0,2016-05-26,1464221000.0,2016-05-26 00:00:45.583000+00:00,23


(64761, 8)

In [8]:
model_Digi = VSKNN_STAN(k=100,
                     sample_size=1000,
                     similarity='vec',
                    #    stan:  
                     lambda_spw=4.9,  
                     lambda_snh=80,  
                     lambda_inh=9.8,  
                    #    vsknn:  
                     lambda_ipw=4.9, 
                     lambda_idf=5
                       )

In [9]:
model_Digi.fit(train=train_data, test=test_data)

In [10]:
metrics = [
    MRR(2),
    MRR(3),
    MRR(4),
    MRR(5),
    MRR(10),
    MRR(15),
    MRR(20),
    HitRate(1),
    HitRate(2),
    HitRate(3),
    HitRate(4),
    HitRate(5),
    HitRate(10),
    HitRate(15),
    HitRate(20)]

output = evaluate_sessions(pr=model_Digi,
                  metrics= metrics,
                  test_data=test_data,
                  train_data=train_data,
                  cut_off=20)
evaluation_results_to_csv(output=output, dataset_name='Diginetica', algo_name='VSTAN (Baseline)', reward_func='no reward func')

START evaluation of  64761  actions in  14163  sessions
    eval process:  0  of  64761  actions:  0.0  %
    eval process:  1000  of  64761  actions:  1.5441392195920385  %
    eval process:  2000  of  64761  actions:  3.088278439184077  %
    eval process:  3000  of  64761  actions:  4.632417658776116  %
    eval process:  4000  of  64761  actions:  6.176556878368154  %
    eval process:  5000  of  64761  actions:  7.720696097960192  %
    eval process:  6000  of  64761  actions:  9.264835317552231  %
    eval process:  7000  of  64761  actions:  10.808974537144268  %
    eval process:  8000  of  64761  actions:  12.353113756736308  %
    eval process:  9000  of  64761  actions:  13.897252976328344  %
    eval process:  10000  of  64761  actions:  15.441392195920384  %
    eval process:  11000  of  64761  actions:  16.985531415512423  %
    eval process:  12000  of  64761  actions:  18.529670635104463  %
    eval process:  13000  of  64761  actions:  20.0738098546965  %
    eval proc

Unnamed: 0,Dataset,Algorithm,Reward Function,Metric,Value
0,Diginetica,VSTAN (Baseline),no reward func,MRR@2:,0.144176
1,Diginetica,VSTAN (Baseline),no reward func,MRR@3:,0.159565
2,Diginetica,VSTAN (Baseline),no reward func,MRR@4:,0.168548
3,Diginetica,VSTAN (Baseline),no reward func,MRR@5:,0.17454
4,Diginetica,VSTAN (Baseline),no reward func,MRR@10:,0.187495
5,Diginetica,VSTAN (Baseline),no reward func,MRR@15:,0.192192
6,Diginetica,VSTAN (Baseline),no reward func,MRR@20:,0.194401
7,Diginetica,VSTAN (Baseline),no reward func,HitRate@1:,0.109431
8,Diginetica,VSTAN (Baseline),no reward func,HitRate@2:,0.17892
9,Diginetica,VSTAN (Baseline),no reward func,HitRate@3:,0.225088


## Retail Rocket

In [None]:
train_data = pd.read_csv(r'data\processed datasets\retailrocket\events_train - Filtered items with min 30 actions.csv',
                    sep='\t',)
display(train_data.head())
display(train_data.shape)

In [None]:
test_data = pd.read_csv(r'data\processed datasets\retailrocket\events_test - Filtered items with min 30 actions.csv',
                    sep='\t',)

display(test_data.head())
display(test_data.shape)

In [None]:
model_Ret = VSKNN_STAN(k=500,
                       sample_size=1000,
                       similarity='cosine',  
                       #stan:  
                       lambda_spw=7.24,  
                       lambda_snh=100,  
                       lambda_inh=3.62,  
                       #vsknn:  
                       lambda_ipw=3.62, 
                       lambda_idf=1
                       )

In [None]:
model_Ret.fit(train=train_data, test=test_data)

In [None]:
metrics = [
    MRR(2),
    MRR(3),
    MRR(4),
    MRR(5),
    MRR(10),
    MRR(15),
    MRR(20),
    HitRate(1),
    HitRate(2),
    HitRate(3),
    HitRate(4),
    HitRate(5),
    HitRate(10),
    HitRate(15),
    HitRate(20)]

output = evaluate_sessions(pr=model_Ret,
                  metrics= metrics,
                  test_data=test_data,
                  train_data=train_data,
                  cut_off=20)
evaluation_results_to_csv(output=output, dataset_name='Retailrocket', algo_name='VSTAN (Baseline)', reward_func='no reward func')