In [1]:
import numpy as np
import pandas as pd  # Optional dependency
from wsknn import fit
from wsknn.utils import load_gzipped_pickle
from wsknn.evaluate import score_model

In [2]:
# Load data
ITEMS = 'demo-data/recsys-2015/parsed_items.pkl.gz'
SESSIONS = 'demo-data/recsys-2015/parsed_sessions.pkl.gz'

items = load_gzipped_pickle(ITEMS)
sessions = load_gzipped_pickle(SESSIONS)

imap = items['map']
smap = sessions['map']

In [3]:
def get_sample_sessions(set_of_sessions, n_sessions=1000):
    sessions_keys = list(set_of_sessions.keys())
    key_sample = np.random.choice(sessions_keys, n_sessions)
    sampled = [set_of_sessions[dk] for dk in key_sample]
    return sampled

test_sessions = get_sample_sessions(smap)

In [4]:
session_selection = ['random', 'recent', 'common_items']
items_weighting = ['linear', 'log', 'quadratic']
ranking_methods = ['linear', 'log', 'quadratic', 'inv']

In [5]:
# Show different recommendations

for session in test_sessions:
    if len(session[0]) > 4:
        print('Session:')
        print(session)
        print('')
        for s_selection in session_selection:
            for i_weight in items_weighting:
                for rank_method in ranking_methods:
                    print('Session selection:', s_selection)
                    print('Item weighting:', i_weight)
                    print('Ranking:', rank_method)
                    fitted = fit(smap, imap,
                                 number_of_neighbors=1000,
                                 sampling_strategy=s_selection,
                                 sample_size=10000,
                                 weighting_func=i_weight,
                                 ranking_strategy=rank_method)
                    recommends = fitted.recommend(session)
                    print('Recommendations:', recommends)
        break

Session:
[[214821341, 214821341, 214612721, 214580465, 214539592, 214510455, 214710013], [1397897051.836, 1397897131.162, 1397897223.089, 1397897413.903, 1397897611.54, 1397897677.988, 1397897887.507]]

Session selection: random
Item weighting: linear
Ranking: linear
Recommendations: [(214710013, 193.0952380952423), (214853081, 52.92857142857128), (214854716, 45.69047619047607), (214851757, 44.57142857142845), (214854720, 43.23809523809512)]
Session selection: random
Item weighting: linear
Ranking: log
Recommendations: [(214710013, 192.5238095238137), (214853081, 51.59523809523795), (214851757, 46.47619047619035), (214854716, 43.97619047619036), (214854720, 42.28571428571417)]
Session selection: random
Item weighting: linear
Ranking: quadratic
Recommendations: [(214710013, 188.5238095238134), (214853081, 50.833333333333194), (214854720, 48.190476190476055), (214854716, 46.07142857142845), (214851757, 44.95238095238083)]
Session selection: random
Item weighting: linear
Ranking: inv
Reco

In [6]:
# Show different recommendations

columns = ['session weighting', 'items weighting', 'ranking', 'MRR', 'Recall', 'Precision', 'k']
data = []

for s_selection in session_selection:
    for i_weight in items_weighting:
        for rank_method in ranking_methods:
            fitted = fit(smap, imap,
                         number_of_neighbors=1000,
                         sampling_strategy=s_selection,
                         sample_size=10000,
                         weighting_func=i_weight,
                         ranking_strategy=rank_method)
            scores = score_model(test_sessions, fitted, k=fitted.n_of_recommendations, skip_short_sessions=True)
            data.append([s_selection, i_weight, rank_method, scores['MRR'], scores['Recall'], scores['Precision'], fitted.n_of_recommendations])

In [7]:
scores_frame = pd.DataFrame(data=data, columns=columns)
scores_frame.head()

Unnamed: 0,session weighting,items weighting,ranking,MRR,Recall,Precision,k
0,random,linear,linear,0.386028,0.297037,0.186826,5
1,random,linear,log,0.396307,0.297695,0.185629,5
2,random,linear,quadratic,0.393812,0.309103,0.19521,5
3,random,linear,inv,0.379341,0.303166,0.186826,5
4,random,log,linear,0.388822,0.306845,0.186826,5


In [12]:
scores_frame.sort_values('MRR', ascending=False).head(10)

Unnamed: 0,session weighting,items weighting,ranking,MRR,Recall,Precision,k
22,recent,quadratic,quadratic,0.407385,0.308877,0.190419,5
18,recent,log,quadratic,0.406088,0.310018,0.190419,5
6,random,log,quadratic,0.40519,0.310498,0.194012,5
10,random,quadratic,quadratic,0.400998,0.3092,0.192814,5
30,common_items,log,quadratic,0.400898,0.287325,0.184431,5
14,recent,linear,quadratic,0.399501,0.311288,0.194012,5
21,recent,quadratic,log,0.399501,0.313971,0.189222,5
34,common_items,quadratic,quadratic,0.399501,0.282335,0.182036,5
20,recent,quadratic,linear,0.399202,0.311975,0.188024,5
23,recent,quadratic,inv,0.397405,0.309875,0.188024,5


In [13]:
scores_frame.sort_values('Recall', ascending=False).head(10)

Unnamed: 0,session weighting,items weighting,ranking,MRR,Recall,Precision,k
21,recent,quadratic,log,0.399501,0.313971,0.189222,5
9,random,quadratic,log,0.392116,0.313864,0.192814,5
20,recent,quadratic,linear,0.399202,0.311975,0.188024,5
7,random,log,inv,0.391517,0.311515,0.189222,5
16,recent,log,linear,0.386926,0.311508,0.186826,5
17,recent,log,log,0.386427,0.311315,0.185629,5
14,recent,linear,quadratic,0.399501,0.311288,0.194012,5
11,random,quadratic,inv,0.397106,0.311154,0.191617,5
8,random,quadratic,linear,0.387425,0.31057,0.190419,5
6,random,log,quadratic,0.40519,0.310498,0.194012,5


In [14]:
scores_frame.sort_values('Precision', ascending=False).head(10)

Unnamed: 0,session weighting,items weighting,ranking,MRR,Recall,Precision,k
2,random,linear,quadratic,0.393812,0.309103,0.19521,5
14,recent,linear,quadratic,0.399501,0.311288,0.194012,5
6,random,log,quadratic,0.40519,0.310498,0.194012,5
9,random,quadratic,log,0.392116,0.313864,0.192814,5
10,random,quadratic,quadratic,0.400998,0.3092,0.192814,5
11,random,quadratic,inv,0.397106,0.311154,0.191617,5
22,recent,quadratic,quadratic,0.407385,0.308877,0.190419,5
18,recent,log,quadratic,0.406088,0.310018,0.190419,5
8,random,quadratic,linear,0.387425,0.31057,0.190419,5
7,random,log,inv,0.391517,0.311515,0.189222,5


In [15]:
scores_frame[['MRR', 'Precision', 'Recall']].describe()

Unnamed: 0,MRR,Precision,Recall
count,36.0,36.0,36.0
mean,0.392124,0.186261,0.30057
std,0.007899,0.004871,0.011251
min,0.378144,0.177246,0.281337
25%,0.386901,0.183234,0.287951
50%,0.391816,0.186826,0.30624
75%,0.397854,0.189521,0.310516
max,0.407385,0.19521,0.313971
