In [7]:
import pandas as pd
from src.recsys_baseline import VSKNN_STAN
from src.evaluation import evaluate_sessions, MRR, HitRate
from src.utils import evaluation_results_to_csv

# Item knn sesion based recommendation
This is the base model that will be used to compare the RL model with.

# Initialise the model (`VSKNN_STAN`)
Model parameters are based on research of [S. Latifi, N. Mauro and D. Jannach. 2021. Session-aware recommendation: a surprising quest for the state-of-the-art. Information Sciences](https://doi.org/10.1016/j.ins.2021.05.048) 

* Optimised parameters for Retail Rocket dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/retailrocket/session_based/window/window_retailr_vstan.yml)  
    k: 500  
    sample_size: 1000  
    similarity: 'cosine'  
    stan:  
    lambda_spw: 7.24  
    lambda_snh: 100  
    lambda_inh: 3.62  
    vsknn:  
    lambda_ipw: 3.62  
    lambda_idf: 1  

* Optimised parameters for Diginetica dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/diginetica/window/window_multiple_digi_vstan.yml)  
    k: 100  
    sample_size: 1000  
    similarity: 'vec'  
    stan:  
    lambda_spw: 4.9  
    lambda_snh: 80  
    lambda_inh: 9.8  
    vsknn:  
    lambda_ipw: 4.9  
    lambda_idf: 5  



## Retail Rocket

In [8]:
train_data = pd.read_csv(r'data\processed datasets\retailrocket\events_train - Filtered items with min 30 actions.csv',
                    sep='\t',)
display(train_data.head())
display(train_data.shape)

Unnamed: 0,Time,UserId,Type,ItemId,SessionId
0,1438969904,2,view,325215,3
1,1438970013,2,view,325215,3
2,1438970212,2,view,259884,3
3,1438970468,2,view,216305,3
4,1438970905,2,view,342816,3


(558181, 5)

In [9]:
test_data = pd.read_csv(r'data\processed datasets\retailrocket\events_test - Filtered items with min 30 actions.csv',
                    sep='\t',)

display(test_data.head())
display(test_data.shape)

Unnamed: 0,Time,UserId,Type,ItemId,SessionId
0,1442004589,0,view,285930,1
1,1442004759,0,view,357564,1
2,1442004917,0,view,67045,1
3,1442290140,177,view,6073,241
4,1442290183,177,addtocart,6073,241


(20027, 5)

In [10]:
model_Ret = VSKNN_STAN(k=500,
                       sample_size=1000,
                       similarity='cosine',  
                       #stan:  
                       lambda_spw=7.24,  
                       lambda_snh=100,  
                       lambda_inh=3.62,  
                       #vsknn:  
                       lambda_ipw=3.62, 
                       lambda_idf=1
                       )

In [11]:
model_Ret.fit(train=train_data, test=test_data)

In [12]:
metrics = [
    MRR(2),
    MRR(3),
    MRR(4),
    MRR(5),
    MRR(10),
    MRR(15),
    MRR(20),
    HitRate(1),
    HitRate(2),
    HitRate(3),
    HitRate(4),
    HitRate(5),
    HitRate(10),
    HitRate(15),
    HitRate(20)]

output = evaluate_sessions(pr=model_Ret,
                  metrics= metrics,
                  test_data=test_data,
                  train_data=train_data,
                  cut_off=20)
evaluation_results_to_csv(output=output, dataset_name='Retailrocket - Filtered items with min 30 actions', algo_name='VSTAN (Baseline)', reward_func='no reward func')

START evaluation of  20027  actions in  7214  sessions
    eval process:  0  of  20027  actions:  0.0  %
    eval process:  1000  of  20027  actions:  4.99325910021471  %
    eval process:  2000  of  20027  actions:  9.98651820042942  %
    eval process:  3000  of  20027  actions:  14.979777300644132  %
    eval process:  4000  of  20027  actions:  19.97303640085884  %
    eval process:  5000  of  20027  actions:  24.966295501073553  %
    eval process:  6000  of  20027  actions:  29.959554601288264  %
    eval process:  7000  of  20027  actions:  34.95281370150297  %
    eval process:  8000  of  20027  actions:  39.94607280171768  %
    eval process:  9000  of  20027  actions:  44.93933190193239  %
    eval process:  10000  of  20027  actions:  49.932591002147106  %
    eval process:  11000  of  20027  actions:  54.92585010236181  %
    eval process:  12000  of  20027  actions:  59.91910920257653  %
    eval process:  13000  of  20027  actions:  64.91236830279124  %
    eval process: 

Unnamed: 0,Dataset,Algorithm,Reward Function,Metric,Value
0,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,MRR@2:,0.482518
1,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,MRR@3:,0.504423
2,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,MRR@4:,0.515115
3,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,MRR@5:,0.520937
4,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,MRR@10:,0.529848
5,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,MRR@15:,0.532243
6,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,MRR@20:,0.53318
7,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,HitRate@1:,0.416842
8,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,HitRate@2:,0.548193
9,Retailrocket - Filtered items with min 30 actions,VSTAN (Baseline),no reward func,HitRate@3:,0.613908
