In [16]:
import pandas as pd
from src.recsys_baseline import VSKNN_STAN
from src.evaluation import evaluate_sessions, MRR, HitRate

# Item knn sesion based recommendation
This is the base model that will be used to compare the RL model with.

In [17]:
train_data = pd.read_csv(r'data\retailrocket\prepared\train-item-views_train_tr.txt',
                    sep='\t',)
display(train_data.head())
display(train_data.shape)


Unnamed: 0,SessionId,Time,ItemId,Date,Datestamp,TimeO,ItemSupport
0,1,1462752000.0,9654,2016-05-09,1462752000.0,2016-05-09 00:01:15.848000+00:00,399
1,1,1462752000.0,33043,2016-05-09,1462752000.0,2016-05-09 00:02:53.912000+00:00,195
2,1,1462752000.0,32118,2016-05-09,1462752000.0,2016-05-09 00:04:03.569000+00:00,67
3,1,1462752000.0,12352,2016-05-09,1462752000.0,2016-05-09 00:05:29.870000+00:00,327
4,1,1462752000.0,35077,2016-05-09,1462752000.0,2016-05-09 00:06:30.072000+00:00,102


(859459, 7)

In [18]:
test_data = pd.read_csv(r'data\retailrocket\prepared\train-item-views_test.txt',
                    sep='\t',)

display(test_data.head())
display(test_data.shape)


Unnamed: 0,SessionId,Time,ItemId,Date,Datestamp,TimeO,ItemSupport
0,289,1464221000.0,125013,2016-05-26,1464221000.0,2016-05-26 00:00:18.301000+00:00,10
1,289,1464222000.0,64068,2016-05-26,1464221000.0,2016-05-26 00:14:07.735000+00:00,30
2,289,1464222000.0,133346,2016-05-26,1464221000.0,2016-05-26 00:14:38.934000+00:00,36
3,289,1464222000.0,438457,2016-05-26,1464221000.0,2016-05-26 00:18:34.305000+00:00,6
4,289,1464222000.0,198930,2016-05-26,1464221000.0,2016-05-26 00:18:48.607000+00:00,10


(76895, 7)

# Initialise the model (`VSKNN_STAN`)
Model parameters are based on research of [S. Latifi, N. Mauro and D. Jannach. 2021. Session-aware recommendation: a surprising quest for the state-of-the-art. Information Sciences](https://doi.org/10.1016/j.ins.2021.05.048) 

* Optimised parameters for Retail Rocket dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/retailrocket/session_based/window/window_retailr_vstan.yml)  
    k: 500  
    sample_size: 1000  
    similarity: 'cosine'  
    stan:  
    lambda_spw: 7.24  
    lambda_snh: 100  
    lambda_inh: 3.62  
    vsknn:  
    lambda_ipw: 3.62  
    lambda_idf: 1  

* Optimised parameters for Diginetica dataset: [config](https://github.com/rn5l/session-rec/blob/5dcd583cbd8d44703a5248b9a308945f24b91390/conf/save/diginetica/window/window_multiple_digi_vstan.yml)  
    k: 100  
    sample_size: 1000  
    similarity: 'vec'  
    stan:  
    lambda_spw: 4.9  
    lambda_snh: 80  
    lambda_inh: 9.8  
    vsknn:  
    lambda_ipw: 4.9  
    lambda_idf: 5  



## Retail Rocket

In [19]:
model_Ret = VSKNN_STAN(k=500,
                       sample_size=1000,
                       similarity='cosine',  
                       #stan:  
                       lambda_spw=7.24,  
                       lambda_snh=100,  
                       lambda_inh=3.62,  
                       #vsknn:  
                       lambda_ipw=3.62, 
                       lambda_idf=1
                       )

In [20]:
model_Ret.fit(train=train_data, test=test_data)

In [21]:
predictions = model_Ret.predict_next(session_id=289, 
                                     input_item_id=133346,
                                     predict_for_item_ids=train_data['ItemId'].unique())
print(predictions)
print(f'List with prediction scores, (probabilities)')

9654      0.0
33043     0.0
32118     0.0
12352     0.0
35077     0.0
         ... 
258318    0.0
175457    0.0
416695    0.0
381794    0.0
73017     0.0
Length: 43023, dtype: float64
List with prediction scores, (probabilities)


In [22]:
metrics = [MRR(20),
           HitRate(20)]

evaluate_sessions(pr=model_Ret,
                  metrics= metrics,
                  test_data=test_data,
                  train_data=train_data,
                  cut_off=20)

START evaluation of  76895  actions in  15969  sessions
    eval process:  0  of  76895  actions:  0.0  %
    eval process:  1000  of  76895  actions:  1.3004746732557384  %
    eval process:  2000  of  76895  actions:  2.600949346511477  %


KeyboardInterrupt: 

In [9]:
results = [('MRR@20: ',
  0.18372355549278785,
  'Bin: ;\nPrecision@20: ;',
  'Pos: ;0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39;\nPrecision@20: ;0.1923429731707262;0.2130989751510292;0.18494087024707742;0.17348302115182757;0.17469568653083595;0.16558751112390638;0.160819786134727;0.15158782965596843;0.1598319946330456;0.14795290669414837;0.16483622190962532;0.15703123369639385;0.1575727141065769;0.15267422600506575;0.15321847675160974;0.15358743696471566;0.13086965926122007;0.14231122064023527;0.1531287431342716;0.10542372375431582;0.130395174042715;0.14180672268907565;0.11122303622303623;0.054672943988201335;0.18073870573870573;0.12518037518037517;0.12446189001916556;0.10142390289449113;0.14041783380018671;0.07324374971433795;0.017361111111111112;0.0755952380952381;0.027551020408163263;0.36666666666666664;0.265625;0.25;0.5;0.3333333333333333;0.16666666666666666;0.05;'),
 ('HitRate@20: ',
  0.5135246036175032,
  'Bin: ;\nHitRate@20: ;',
  'Pos: ;0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39;\nHitRate@20: ;0.48994927672365207;0.5400803353126091;0.5325826545280307;0.5162237078102071;0.5213065109236427;0.5196392202502182;0.5205585725368502;0.49898477157360405;0.5109926715522984;0.48596491228070177;0.5034883720930232;0.49922958397534667;0.4909456740442656;0.4961439588688946;0.5096153846153846;0.5020746887966805;0.43103448275862066;0.4057971014492754;0.4857142857142857;0.4625;0.4426229508196721;0.30434782608695654;0.41025641025641024;0.34375;0.38461538461538464;0.3181818181818182;0.47368421052631576;0.5;0.5;0.5454545454545454;0.2;0.375;0.2857142857142857;0.6;0.5;0.25;0.6666666666666666;0.3333333333333333;0.5;0.5;')]

print(results)

[('MRR@20: ', 0.18372355549278785, 'Bin: ;\nPrecision@20: ;', 'Pos: ;0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39;\nPrecision@20: ;0.1923429731707262;0.2130989751510292;0.18494087024707742;0.17348302115182757;0.17469568653083595;0.16558751112390638;0.160819786134727;0.15158782965596843;0.1598319946330456;0.14795290669414837;0.16483622190962532;0.15703123369639385;0.1575727141065769;0.15267422600506575;0.15321847675160974;0.15358743696471566;0.13086965926122007;0.14231122064023527;0.1531287431342716;0.10542372375431582;0.130395174042715;0.14180672268907565;0.11122303622303623;0.054672943988201335;0.18073870573870573;0.12518037518037517;0.12446189001916556;0.10142390289449113;0.14041783380018671;0.07324374971433795;0.017361111111111112;0.0755952380952381;0.027551020408163263;0.36666666666666664;0.265625;0.25;0.5;0.3333333333333333;0.16666666666666666;0.05;'), ('HitRate@20: ', 0.5135246036175032, 'Bin: ;\nHitRate@20: ;', 'Po

In [12]:
results[0]

('MRR@20: ',
 0.18372355549278785,
 'Bin: ;\nPrecision@20: ;',
 'Pos: ;0;1;2;3;4;5;6;7;8;9;10;11;12;13;14;15;16;17;18;19;20;21;22;23;24;25;26;27;28;29;30;31;32;33;34;35;36;37;38;39;\nPrecision@20: ;0.1923429731707262;0.2130989751510292;0.18494087024707742;0.17348302115182757;0.17469568653083595;0.16558751112390638;0.160819786134727;0.15158782965596843;0.1598319946330456;0.14795290669414837;0.16483622190962532;0.15703123369639385;0.1575727141065769;0.15267422600506575;0.15321847675160974;0.15358743696471566;0.13086965926122007;0.14231122064023527;0.1531287431342716;0.10542372375431582;0.130395174042715;0.14180672268907565;0.11122303622303623;0.054672943988201335;0.18073870573870573;0.12518037518037517;0.12446189001916556;0.10142390289449113;0.14041783380018671;0.07324374971433795;0.017361111111111112;0.0755952380952381;0.027551020408163263;0.36666666666666664;0.265625;0.25;0.5;0.3333333333333333;0.16666666666666666;0.05;')

## Diginetica

In [7]:
# initialise parameters for Diginetica dataset as mentioned before
model_Dig = VSKNN_STAN(k=100,
                       sample_size=1000,
                       similarity='vec',
                    #    stan:  
                       lambda_spw=4.9,  
                       lambda_snh=80,  
                       lambda_inh=9.8,  
                    #    vsknn:  
                       lambda_ipw=4.9, 
                       lambda_idf=5
                       )