## Avaliação em holdouts - LASTFM

In [1]:
import os
import sys
sys.path.append(os.path.abspath('') + '/..')

In [2]:
from data import ImplicitData, getBucketsHoldouts
from plot_utils import lineplot_recallxholdout, recall_heatmap
from dataset_evaluation_utils import *
from flurs.recommender import BPRMFRecommender, SketchRecommender
from eval_implicit import EvaluateHoldoutsFlurs

import pandas as pd
import numpy as np

from datetime import datetime
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
sns.set_style('whitegrid')

___
## BWT FWT

ACC, BWT, e FWT - Lopez-Paz e Ranzato GEM

In [3]:
def avg_recall(results_matrix): # Lopez-Paz e Ranzato GEM 2017
    return np.mean( np.diag(results_matrix) )

def compute_BWT(results_matrix): # Lopez-Paz e Ranzato GEM 2017
    BWT = []
    n_checkpoints = results_matrix.shape[0]
    for T in range(1, n_checkpoints): # 1 means holdout 2, 2 means 3, so on
        Rti = results_matrix.iloc[T, 0:T] # get models performances' on previous holdouts
        Rii = np.diag(results_matrix)[0:T] # get models performances' on their closest holdouts (diagonal)
        E = sum( Rti - Rii ) # future models performances' - performances' of models closest to holdouts (diagonal)
        BWT.append( E/T ) # store average BWT for model
    return BWT, np.mean( BWT ) # return BWT and average BWT for all models

def compute_FWT(results_matrix): # Díaz-Rodriguez et al. 2018
    upper_tri = results_matrix.to_numpy()[np.triu_indices(results_matrix.shape[0], k=1)]
    return np.mean(upper_tri)

### BPRMF

In [4]:
# importa dataset 'palco playlists'
data = pd.read_csv('output/lastfm_dump/sampled_lastfm.csv')
user_col = 'user_id'
item_col = 'item_id'

In [5]:
buckets = joblib.load( 'output/lastfm_dump/sample_buckets.joblib')
holdouts = joblib.load( 'output/lastfm_dump/sample_holdouts.joblib')

In [None]:
%%time
import itertools
from flurs.data.entity import User, Item, Event
def grid_search(model, data, user_col, item_col, exclude_known_items, N_recommendations=-1):    
#     try:
        usermap = pd.Series(pd.unique( data[user_col] )).reset_index().set_index(0).to_dict()['index']
        itemmap = pd.Series(pd.unique( data[item_col] )).reset_index().set_index(0).to_dict()['index']
        num_factors = [50, 100, 150, 200]
        regularization = [0.01, 0.05, 0.1, 0.25, 0.5]
        learn_rate = [0.01, 0.05, 0.1, 0.25, 0.5]
        grid = [num_factors, regularization, learn_rate]
        grid = list(itertools.product(*grid))
        results = []
        p0=-1
        for i, hp in enumerate(grid):
            progress = ((i*100)//len(grid))
            if progress%5==0 and progress!=p0:
                p0=progress
                print(progress, '%')
            nf, reg, lr = hp
            m = model(k=nf, l2_reg=reg, learn_rate=lr)     
            m.initialize()
            max_item_ID = 0
            empty_stream = ImplicitData([], []) # used to track items seen by users
            iteration_results = []
            for u, i in data[[user_col, item_col]].values:
                u_flurs, i_flurs = usermap[u], itemmap[i]
                max_item_ID = max(max_item_ID, i_flurs)
                user = User(u_flurs)
                item = Item(i_flurs)
                event = Event(user, item)
                m.register(user)
                m.register(item)
                # Prequential:
                # recommend
                reclist, scores = m.recommend(user, np.arange(max_item_ID+1) )                
#                 print(u_flurs, i_flurs, reclist[:10], scores[:10]) # 
                if exclude_known_items:
                    user_items = empty_stream.GetUserItems(u_flurs)
                    reclist = np.delete(reclist, user_items)
                # get n recommendations
                n = N_recommendations
                if n == -1:
                    n = len(reclist)
                reclist = reclist[:n]
                # evaluate
                result = 0
                if len(reclist) == 0:
                    iteration_results.append( 0 )
                else:
                    iteration_results.append( int(i_flurs in reclist) )
                # update
                empty_stream.AddFeedback(u_flurs, i_flurs) #
                m.update(event)

            results.append( np.mean( iteration_results ) )
#             print ( np.mean( iteration_results ), len( iteration_results ))
            
        return grid, results    
    
#     except:
#         print(u, i, u_flurs, i_flurs, max_item_ID, user, item, user_items)

prop = 0.05
hp_sample = data.iloc[:round( data.shape[0]*prop )]
# 4h 17min 22s
grid, results = grid_search(model=BPRMFRecommender, data=hp_sample, user_col=user_col, item_col=item_col, exclude_known_items=True, N_recommendations=20)
len(grid), len(results), max(results) # (27, 27, 0.03681617837697693)

0.0 %
1.0 %
2.0 %
3.0 %
4.0 %
5.0 %
6.0 %
7.0 %
8.0 %
9.0 %
10.0 %
11.0 %
12.0 %
13.0 %
14.0 %


  sigmoid = np.e ** (-x_uij) / (1 + np.e ** (-x_uij))
  sigmoid = np.e ** (-x_uij) / (1 + np.e ** (-x_uij))


15.0 %
16.0 %
17.0 %
18.0 %
19.0 %
20.0 %
21.0 %
22.0 %


In [None]:
# previous (50, 0.5, 0.5)
print( grid[ np.argmax( results ) ] )
k, l2_reg, learn_rate = grid[ np.argmax( results ) ]

In [None]:
# Se o stream for passado, ao excluir itens conhecidos o recall é sempre 0. Ao permitir a recomendação de itens já vistos, o recall não é 0.
# model = BPRMFRecommender(k=num_factors, l2_reg=regularization, learn_rate=learn_rate)
model = BPRMFRecommender(k, l2_reg, learn_rate )

In [None]:
# criamos instancia de EvaluateHoldouts para treinar o modelo e avaliar checkpoints
eval = EvaluateHoldoutsFlurs(model, buckets, holdouts, data, user_col, item_col)

In [None]:
%%time
# 1h 12min 53s
eval.Train_Evaluate(N_recommendations = 20, exclude_known_items = True)

In [None]:
#0.0 freq T
rm = eval.results_matrix
df = pd.DataFrame(rm)
df.to_csv('output/lastfm_dump/sample_lastfm month_bucket BPRMF results.csv', index=False)

In [None]:
recall_heatmap(df,
               round_point=4,
    title='Recall@20 for BPRMF checkpoints across Holdouts - Lastfm',
    filepath='images/heatmaps/lastfm_dump/sample_lastfm month_bucket BPRMF heatmap.png') #='images/heatmaps/palco_2010 month_bucket ISGD heatmap.png'

In [None]:
arecall = avg_recall(df)
arecall

In [None]:
BWT, meanBWT = compute_BWT(df)
BWT, meanBWT

In [None]:
FWT = compute_FWT(df)
FWT
# que itens que usuario utilizou no passado e deixou de consumir o sistema ainda pode recomendar

In [None]:
joblib.dump(eval.IncrementalTraining_time_record, 'output/lastfm_dump/sample_lastfm month_bucket BPRMF training time.joblib')
joblib.dump(eval.EvaluateHoldouts_time_record, 'output/lastfm_dump/sample_lastfm month_bucket BPRMF eval time.joblib')