In [1]:
import pandas as pd
from src.recsys_baseline import VSKNN_STAN
from src.evaluation import evaluate_sessions, MRR, HitRate
from src.utils import evaluation_results_to_csv

# Item knn sesion based recommendation
This is the base model that will be used to compare the RL model with.

# Initialise the model (`VSKNN_STAN`)
Model parameters are based on research of [S. Latifi, N. Mauro and D. Jannach. 2021. Session-aware recommendation: a surprising quest for the state-of-the-art. Information Sciences](https://doi.org/10.1016/j.ins.2021.05.048) 

* Optimised parameters for Retail Rocket dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/retailrocket/session_based/window/window_retailr_vstan.yml)  
    k: 500  
    sample_size: 1000  
    similarity: 'cosine'  
    stan:  
    lambda_spw: 7.24  
    lambda_snh: 100  
    lambda_inh: 3.62  
    vsknn:  
    lambda_ipw: 3.62  
    lambda_idf: 1  

* Optimised parameters for Diginetica dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/diginetica/window/window_multiple_digi_vstan.yml)  
    k: 100  
    sample_size: 1000  
    similarity: 'vec'  
    stan:  
    lambda_spw: 4.9  
    lambda_snh: 80  
    lambda_inh: 9.8  
    vsknn:  
    lambda_ipw: 4.9  
    lambda_idf: 5  



## Retail Rocket

In [2]:
train_data = pd.read_csv(r'data\processed datasets\retailrocket\events_train.csv',
                    sep='\t',)
display(train_data.head())
display(train_data.shape)

Unnamed: 0,Time,UserId,Type,ItemId,SessionId
0,1438969904,2,view,325215,3
1,1438970013,2,view,325215,3
2,1438970212,2,view,259884,3
3,1438970468,2,view,216305,3
4,1438970905,2,view,342816,3


(1079830, 5)

In [3]:
test_data = pd.read_csv(r'data\processed datasets\retailrocket\events_test.csv',
                    sep='\t',)

display(test_data.head())
display(test_data.shape)

Unnamed: 0,Time,UserId,Type,ItemId,SessionId
0,1442004589,0,view,285930,1
1,1442004759,0,view,357564,1
2,1442004917,0,view,67045,1
3,1442338531,54,view,388096,66
4,1442338665,54,view,283115,66


(40771, 5)

In [4]:
model_Ret = VSKNN_STAN(k=500,
                       sample_size=1000,
                       similarity='cosine',  
                       #stan:  
                       lambda_spw=7.24,  
                       lambda_snh=100,  
                       lambda_inh=3.62,  
                       #vsknn:  
                       lambda_ipw=3.62, 
                       lambda_idf=1
                       )

In [5]:
model_Ret.fit(train=train_data, test=test_data)

In [6]:
metrics = [
    MRR(2),
    MRR(3),
    MRR(4),
    MRR(5),
    MRR(10),
    MRR(15),
    MRR(20),
    HitRate(1),
    HitRate(2),
    HitRate(3),
    HitRate(4),
    HitRate(5),
    HitRate(10),
    HitRate(15),
    HitRate(20)]

output = evaluate_sessions(pr=model_Ret,
                  metrics= metrics,
                  test_data=test_data,
                  train_data=train_data,
                  cut_off=20)
evaluation_results_to_csv(output=output, dataset_name='Retailrocket', algo_name='VSTAN (Baseline)', reward_func='no reward func')

START evaluation of  40771  actions in  13074  sessions
    eval process:  0  of  40771  actions:  0.0  %
    eval process:  1000  of  40771  actions:  2.452723749724069  %
    eval process:  2000  of  40771  actions:  4.905447499448138  %
    eval process:  3000  of  40771  actions:  7.358171249172206  %
    eval process:  4000  of  40771  actions:  9.810894998896275  %
    eval process:  5000  of  40771  actions:  12.263618748620344  %
    eval process:  6000  of  40771  actions:  14.716342498344412  %
    eval process:  7000  of  40771  actions:  17.16906624806848  %
    eval process:  8000  of  40771  actions:  19.62178999779255  %
    eval process:  9000  of  40771  actions:  22.074513747516615  %
    eval process:  10000  of  40771  actions:  24.527237497240687  %
    eval process:  11000  of  40771  actions:  26.979961246964756  %
    eval process:  12000  of  40771  actions:  29.432684996688824  %
    eval process:  13000  of  40771  actions:  31.885408746412892  %
    eval pro

Unnamed: 0,Dataset,Algorithm,Reward Function,Metric,Value
0,Retailrocket,VSTAN (Baseline),no reward func,MRR@2:,0.371791
1,Retailrocket,VSTAN (Baseline),no reward func,MRR@3:,0.390518
2,Retailrocket,VSTAN (Baseline),no reward func,MRR@4:,0.399454
3,Retailrocket,VSTAN (Baseline),no reward func,MRR@5:,0.404631
4,Retailrocket,VSTAN (Baseline),no reward func,MRR@10:,0.413686
5,Retailrocket,VSTAN (Baseline),no reward func,MRR@15:,0.416188
6,Retailrocket,VSTAN (Baseline),no reward func,MRR@20:,0.417399
7,Retailrocket,VSTAN (Baseline),no reward func,HitRate@1:,0.31758
8,Retailrocket,VSTAN (Baseline),no reward func,HitRate@2:,0.426003
9,Retailrocket,VSTAN (Baseline),no reward func,HitRate@3:,0.482182
