In [9]:
import pandas as pd
from src.recsys_baseline import VSKNN_STAN
from src.evaluation import evaluate_sessions, MRR, HitRate
from src.utils import evaluation_results_to_csv

# Item knn sesion based recommendation
This is the base model that will be used to compare the RL model with.

# Initialise the model (`VSKNN_STAN`)
Model parameters are based on research of [S. Latifi, N. Mauro and D. Jannach. 2021. Session-aware recommendation: a surprising quest for the state-of-the-art. Information Sciences](https://doi.org/10.1016/j.ins.2021.05.048) 

* Optimised parameters for Retail Rocket dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/retailrocket/session_based/window/window_retailr_vstan.yml)  
    k: 500  
    sample_size: 1000  
    similarity: 'cosine'  
    stan:  
    lambda_spw: 7.24  
    lambda_snh: 100  
    lambda_inh: 3.62  
    vsknn:  
    lambda_ipw: 3.62  
    lambda_idf: 1  

* Optimised parameters for Diginetica dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/diginetica/window/window_multiple_digi_vstan.yml)  
    k: 100  
    sample_size: 1000  
    similarity: 'vec'  
    stan:  
    lambda_spw: 4.9  
    lambda_snh: 80  
    lambda_inh: 9.8  
    vsknn:  
    lambda_ipw: 4.9  
    lambda_idf: 5  



## Retail Rocket

In [10]:
train_data = pd.read_csv(r'data\processed datasets\retailrocket\events_train.csv',
                    sep='\t',)
display(train_data.head())
display(train_data.shape)

Unnamed: 0,Time,UserId,Type,ItemId,SessionId
0,1438969904,2,view,325215,3
1,1438970013,2,view,325215,3
2,1438970212,2,view,259884,3
3,1438970468,2,view,216305,3
4,1438970905,2,view,342816,3


(1079830, 5)

In [11]:
test_data = pd.read_csv(r'data\processed datasets\retailrocket\events_test.csv',
                    sep='\t',)

display(test_data.head())
display(test_data.shape)


Unnamed: 0,Time,UserId,Type,ItemId,SessionId
0,1442004589,0,view,285930,1
1,1442004759,0,view,357564,1
2,1442004917,0,view,67045,1
3,1442338531,54,view,388096,66
4,1442338665,54,view,283115,66


(40771, 5)

In [12]:
model_Ret = VSKNN_STAN(k=500,
                       sample_size=1000,
                       similarity='cosine',  
                       #stan:  
                       lambda_spw=7.24,  
                       lambda_snh=100,  
                       lambda_inh=3.62,  
                       #vsknn:  
                       lambda_ipw=3.62, 
                       lambda_idf=1
                       )

In [13]:
model_Ret.fit(train=train_data, test=test_data)

In [15]:
metrics = [
    MRR(2),
    MRR(3),
    MRR(4),
    MRR(5),
    MRR(10),
    MRR(15),
    MRR(20),
    HitRate(1),
    HitRate(2),
    HitRate(3),
    HitRate(4),
    HitRate(5)]

output = evaluate_sessions(pr=model_Ret,
                  metrics= metrics,
                  test_data=test_data,
                  train_data=train_data,
                  cut_off=20)
evaluation_results_to_csv(output=output, dataset_name='Retailrocket', algo_name='VSTAN (Baseline)', reward_func='no reward func')

START evaluation of  40771  actions in  13074  sessions
    eval process:  0  of  40771  actions:  0.0  %
    eval process:  1000  of  40771  actions:  2.452723749724069  %
    eval process:  2000  of  40771  actions:  4.905447499448138  %
    eval process:  3000  of  40771  actions:  7.358171249172206  %
    eval process:  4000  of  40771  actions:  9.810894998896275  %
    eval process:  5000  of  40771  actions:  12.263618748620344  %
    eval process:  6000  of  40771  actions:  14.716342498344412  %
    eval process:  7000  of  40771  actions:  17.16906624806848  %
    eval process:  8000  of  40771  actions:  19.62178999779255  %
    eval process:  9000  of  40771  actions:  22.074513747516615  %
    eval process:  10000  of  40771  actions:  24.527237497240687  %
    eval process:  11000  of  40771  actions:  26.979961246964756  %
    eval process:  12000  of  40771  actions:  29.432684996688824  %
    eval process:  13000  of  40771  actions:  31.885408746412892  %
    eval pro

Unnamed: 0,Dataset,Algorithm,Reward Function,Metric,Value
0,Retailrocket,VSTAN (Baseline),no reward func,MRR@2:,0.371791
1,Retailrocket,VSTAN (Baseline),no reward func,MRR@3:,0.390518
2,Retailrocket,VSTAN (Baseline),no reward func,MRR@4:,0.399454
3,Retailrocket,VSTAN (Baseline),no reward func,MRR@5:,0.404631
4,Retailrocket,VSTAN (Baseline),no reward func,MRR@10:,0.413686
5,Retailrocket,VSTAN (Baseline),no reward func,MRR@15:,0.416188
6,Retailrocket,VSTAN (Baseline),no reward func,MRR@20:,0.417399
7,Retailrocket,VSTAN (Baseline),no reward func,HitRate@1:,0.31758
8,Retailrocket,VSTAN (Baseline),no reward func,HitRate@2:,0.426003
9,Retailrocket,VSTAN (Baseline),no reward func,HitRate@3:,0.482182


## Diginetica

In [15]:
train_data = pd.read_csv(r'data\processed datasets\diginetica\interactions_train_tr.txt',
                    sep='\t',)
display(train_data.head())
display(train_data.shape)

Unnamed: 0,Type,SessionId,ItemId,Time,Date,Datestamp,TimeO,ItemSupport
0,view,1,9654,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:01:15.848000+00:00,74
1,view,1,33043,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:02:53.912000+00:00,41
2,view,1,32118,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:04:03.569000+00:00,19
3,view,1,12352,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:05:29.870000+00:00,79
4,view,1,35077,1462752000.0,2016-05-09,1462752000.0,2016-05-09 00:06:30.072000+00:00,47


(102272, 8)

In [25]:
print(f"Type of interactions in diginetica: {train_data['Type'].unique()}")

Type of interactions in diginetica: ['view' 'purchase']


In [19]:
test_data = pd.read_csv(r'data\processed datasets\diginetica\interactions_test.txt',
                    sep='\t',)
display(test_data.head())
display(test_data.shape)

Unnamed: 0,Type,SessionId,ItemId,Time,Date,Datestamp,TimeO,ItemSupport
0,view,289,125013,1464221000.0,2016-05-26,1464221000.0,2016-05-26 00:00:18.301000+00:00,7
1,view,289,64068,1464222000.0,2016-05-26,1464221000.0,2016-05-26 00:14:07.735000+00:00,6
2,view,289,133346,1464222000.0,2016-05-26,1464221000.0,2016-05-26 00:14:38.934000+00:00,6
3,view,289,198930,1464222000.0,2016-05-26,1464221000.0,2016-05-26 00:18:48.607000+00:00,7
4,view,302,36202,1464221000.0,2016-05-26,1464221000.0,2016-05-26 00:00:45.583000+00:00,23


(64761, 8)

In [20]:
# initialise parameters for Diginetica dataset as mentioned before
model_Dig = VSKNN_STAN(k=100,
                       sample_size=1000,
                       similarity='vec',
                    #    stan:  
                       lambda_spw=4.9,  
                       lambda_snh=80,  
                       lambda_inh=9.8,  
                    #    vsknn:  
                       lambda_ipw=4.9, 
                       lambda_idf=5
                       )

In [21]:
# model_Dig.fit(train=train_data[train_data['Type']=='view'], test=test_data[test_data['Type']=='view'])
model_Dig.fit(train=train_data, test=test_data)

In [22]:
metrics = [
    MRR(2),
    MRR(3),
    MRR(4),
    MRR(5),
    MRR(10),
    MRR(15),
    MRR(20),
    HitRate(1),
    HitRate(2),
    HitRate(3),
    HitRate(4),
    HitRate(5)]

evaluate_sessions(pr=model_Dig,
                  metrics= metrics,
                  test_data=test_data,
                  train_data=train_data,
                  cut_off=20)

START evaluation of  64761  actions in  14163  sessions
    eval process:  0  of  64761  actions:  0.0  %
    eval process:  1000  of  64761  actions:  1.5441392195920385  %
    eval process:  2000  of  64761  actions:  3.088278439184077  %
    eval process:  3000  of  64761  actions:  4.632417658776116  %
    eval process:  4000  of  64761  actions:  6.176556878368154  %
    eval process:  5000  of  64761  actions:  7.720696097960192  %
    eval process:  6000  of  64761  actions:  9.264835317552231  %
    eval process:  7000  of  64761  actions:  10.808974537144268  %
    eval process:  8000  of  64761  actions:  12.353113756736308  %
    eval process:  9000  of  64761  actions:  13.897252976328344  %
    eval process:  10000  of  64761  actions:  15.441392195920384  %
    eval process:  11000  of  64761  actions:  16.985531415512423  %
    eval process:  12000  of  64761  actions:  18.529670635104463  %
    eval process:  13000  of  64761  actions:  20.0738098546965  %
    eval proc

[('MRR@2: ',
  0.14417565911696115,
  'Bin: ;\nPrecision@2: ;',
  'Pos: ;0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39;40;41;\nPrecision@2: ;0.14933276848125396;0.17197356563532593;0.14597243926694134;0.1351457840819543;0.13118279569892474;0.11618798955613577;0.12313803376365443;0.11486486486486487;0.11322463768115942;0.1380778588807786;0.10822147651006711;0.13058035714285715;0.09883720930232558;0.13670411985018727;0.10101010101010101;0.14583333333333334;0.1261682242990654;0.10240963855421686;0.06521739130434782;0.11320754716981132;0.1111111111111111;0.11290322580645161;0.11363636363636363;0.05555555555555555;0.11764705882352941;0.1;0.07142857142857142;0.3888888888888889;0.0;0.0;0.0;0.0;0.5;0.5;0.3333333333333333;0.75;0.5;0.0;0.0;0.0;0.0;0.0;'),
 ('MRR@3: ',
  0.15956493669050956,
  'Bin: ;\nPrecision@3: ;',
  'Pos: ;0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;