# Sample TestRSReclist 

Evaluation walkthrough using a single fold and replication of a custom reclist ie hit rate@100

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
from nbdev.showdoc import show_doc

In [None]:
import os
from datetime import datetime
from dotenv import load_dotenv

load_dotenv('upload.env', verbose=True)

# variables for the submission
EMAIL = os.getenv('EMAIL')  # the e-mail you used to sign up
BUCKET_NAME = os.getenv('BUCKET_NAME')  # you received it in your e-mail
PARTICIPANT_ID = os.getenv('PARTICIPANT_ID')  # you received it in your e-mail
AWS_ACCESS_KEY = os.getenv('AWS_ACCESS_KEY')  # you received it in your e-mail
AWS_SECRET_KEY = os.getenv('AWS_SECRET_KEY')  # you received it in your e-mail



from collections import namedtuple

CFG=namedtuple('CFG',['SEED', 'NUM_FOLDS'])
c = CFG(SEED=42, NUM_FOLDS=1)

In [None]:
from evaluation.EvalRSRunner import EvalRSRunner, EvalRSDataset
from evaluation.EvalRSRunner import ChallengeDataset
from submission.CBOWRecSys import CBoWRecSys
from submission.SkipGramRecSys import SkipGramRecSys

In [None]:
dset = ChallengeDataset(num_folds=c.NUM_FOLDS, seed=c.SEED)

LFM dataset already downloaded. Skipping download.
Loading dataset.
Generating folds.
Generating dataset hashes.


In [None]:
train0 = dset._get_train_set(0)
test0 = dset._get_test_set(0)

In [None]:
len(train0), len(test0)

(6849244, 29717)

In [None]:
dset_small = train0[:100000]

In [None]:
test0[['user_id']].head(2)

Unnamed: 0,user_id
533248701,1206
469458312,2622


In [None]:
test0.set_index('user_id').head(2)

Unnamed: 0_level_0,track_id
user_id,Unnamed: 1_level_1
1206,2386646
2622,3133584


In [None]:
from reclist.abstractions import RecList, rec_test
TOP_K_CHALLENGE=100

class TestRSRecList(RecList):
    @rec_test('HIT_RATE')
    def hit_rate_at_100(self):
        from reclist.metrics.standard_metrics import hit_rate_at_k
        return hit_rate_at_k(self._y_preds, self._y_test, k=100)
    
    @rec_test('BEING_LESS_WRONG')
    def being_less_wrong(self):
        from reclist.metrics.standard_metrics import hits_at_k
        hits = hits_at_k(self._y_preds, self._y_test, k=TOP_K_CHALLENGE).max(axis=2)
        misses = (hits == False)
        miss_gt_vectors = self._dense_repr[self._y_test.loc[misses, 'track_id'].values.reshape(-1)]
        # we calculate the score w.r.t to the first prediction
        miss_pred_vectors = self._dense_repr[self._y_preds.loc[misses, '0'].values.reshape(-1)]

        return float(self.cosine_sim(miss_gt_vectors, miss_pred_vectors).mean())

In [None]:
runner = EvalRSRunner(dset_small, EMAIL, PARTICIPANT_ID, AWS_ACCESS_KEY, AWS_SECRET_KEY, BUCKET_NAME)
my_model=SkipGramRecSys(items=dset)
my_model.train(train0)

       user_id  artist_id  album_id  track_id   timestamp  user_track_count
28870  2798275        548      1624      5341  1394635306                 1
Training completed!


In [None]:
dataset = EvalRSDataset()
dataset.load(x_train=dset_small,
             x_test=test0[['user_id']],
             y_test=test0.set_index('user_id'),
             users=dset.df_users,
             items=dset.df_tracks)
rlist = TestRSRecList(model=my_model, dataset=dataset)

100%|██████████| 29717/29717 [10:04<00:00, 49.17it/s]


In [None]:
rlist.load_dense_repr(path_to_word_vectors=os.path.join(dset.path_to_dataset,'song2vec.wv'))

## CBOW Evaluation

In [None]:
# Hit rate at 50
rlist()

Test Type        : HIT_RATE
Test Description : 
Test Result      : 0.010910927765617107

Generating reports at 2022-08-22 09:26:35.919342


'.reclist/TestRSRecList/CBoWRecSys/1661160395906'

In [None]:
# Hit rate at 100
rlist()

Test Type        : HIT_RATE
Test Description : 
Test Result      : 0.018353258124263343

Generating reports at 2022-08-22 09:51:19.033763


'.reclist/TestRSRecList/CBoWRecSys/1661161879019'

In [None]:
# runner.evaluate(my_model, c.SEED, False, limit=0, custom_RecList=TestRSRecList, debug=True)SkipGramRecSys

## Skipgram Evaluation 

In [None]:
# Hit rate at 100 #alpha=0.025, window=5
rlist()

Test Type        : HIT_RATE
Test Description : 
Test Result      : 0.0028932848876328894

Generating reports at 2022-08-25 11:17:46.928713


'.reclist/TestRSRecList/SkipGramRecSys/1661426266875'

In [None]:
# Hit rate at 100 #alpha=default, window=40, epoch=15 (but it takes more than an hour which may not be acceptable for the competition)

rlist()

Test Type        : HIT_RATE
Test Description : 
Test Result      : 0.12155160812811196

Generating reports at 2022-08-25 13:34:52.945429


'.reclist/TestRSRecList/SkipGramRecSys/1661434492931'

In [None]:
# Hit rate at 100 #alpha=default, window=40, epoch=5

rlist()

Test Type        : HIT_RATE
Test Description : 
Test Result      : 0.05636504357775011

Generating reports at 2022-08-26 05:01:33.204502


'.reclist/TestRSRecList/SkipGramRecSys/1661490093187'