In [1]:
import pandas as pd
import numpy as np
import pickle
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from sklearn.model_selection import train_test_split
import random
from tqdm.notebook import tqdm

np.random.seed = 42

In [2]:
listening_counts = pd.read_csv('listening-counts.tsv', sep='\t')
users = pd.read_csv('users.tsv', sep='\t')
users_sample = users.sample(2000, random_state=42)

In [3]:
counts_sample = listening_counts[listening_counts['user_id'].isin(users_sample['user_id'])]
counts_sample['rating'] = counts_sample['count'] / counts_sample.groupby('user_id')['count'].transform('max')
ratings_df = counts_sample[counts_sample['track_id'].isin(\
            list(counts_sample.groupby('track_id').sum()['rating'].sort_values(ascending=False)[:3000].index))]
ratings_df['tracks_count'] = ratings_df.groupby('user_id')['count'].transform('count')
ratings_df = ratings_df[ratings_df['tracks_count'] >= 5]
ratings_df.drop(columns=['count', 'tracks_count'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  counts_sample['rating'] = counts_sample['count'] / counts_sample.groupby('user_id')['count'].transform('max')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ratings_df['tracks_count'] = ratings_df.groupby('user_id')['count'].transform('count')


In [4]:
ratings_df_train, ratings_df_test = train_test_split(ratings_df,
                                   stratify=ratings_df['user_id'], 
                                   test_size=0.20,
                                   random_state=42)

print('# interactions on Train set: %d' % len(ratings_df_train))
print('# interactions on Test set: %d' % len(ratings_df_test))

# interactions on Train set: 334009
# interactions on Test set: 83503


In [5]:
ratings_df = ratings_df.set_index('user_id')
ratings_df_train = ratings_df_train.set_index('user_id')
ratings_df_test = ratings_df_test.set_index('user_id')

In [6]:
def get_items_listened(person_id, listened_df):
    # Get the user's data and merge in the movie information.
    listened_items = listened_df.loc[person_id]['track_id']
    return set(listened_items if type(listened_items) == pd.Series else [listened_items])

In [7]:
all_items = set(ratings_df['track_id'])

In [8]:
EVAL_RANDOM_SAMPLE_NON_INTERACTED_ITEMS = 100


class ModelEvaluator:

    def get_not_listened_items_sample(self, person_id, sample_size, seed=42):
        listened_items = get_items_listened(person_id, ratings_df)
        non_listened_items = all_items - listened_items

        random.seed(seed)
        non_listened_items_sample = random.sample(non_listened_items, sample_size)
        return set(non_listened_items_sample)

    def _verify_hit_top_n(self, item_id, recommended_items, topn):
        try:
            index = next(i for i, c in enumerate(recommended_items) if c == item_id)
        except:
            index = -1
        hit = int(index in range(0, topn))
        return hit, index

    def evaluate_model_for_user(self, model, person_id):
        # Getting the items in test set
        listened_values_testset = ratings_df_test.loc[person_id]
        if type(listened_values_testset['track_id']) == pd.Series:
            person_listened_items_testset = set(listened_values_testset['track_id'])
        else:
            person_listened_items_testset = set([int(listened_values_testset['track_id'])])
        listened_items_count_testset = len(person_listened_items_testset)

        # Getting a ranked recommendation list from a model for a given user
        person_recs_df = model.recommend_items(person_id,
                                               items_to_ignore=get_items_listened(person_id,
                                                                                    ratings_df_train),
                                               topn=10000000000)

        hits_at_5_count = 0
        hits_at_10_count = 0
        # For each item the user has listened in test set
        for item_id in person_listened_items_testset:
            # Getting a random sample (100) items the user has not listened 
            # (to represent items that are assumed to be no relevant to the user)
            non_listened_items_sample = self.get_not_listened_items_sample(person_id,
                                                                               sample_size=EVAL_RANDOM_SAMPLE_NON_INTERACTED_ITEMS,
                                                                               seed=item_id % (2 ** 32))

            # Combining the current listened item with the 100 random items
            items_to_filter_recs = non_listened_items_sample.union(set([item_id]))

            # Filtering only recommendations that are either the listened item or from a random sample of 100 non-listened items
            valid_recs_df = person_recs_df[person_recs_df['track_id'].isin(items_to_filter_recs)]
            valid_recs = valid_recs_df['track_id'].values
            # Verifying if the current listened item is among the Top-N recommended items
            hit_at_5, index_at_5 = self._verify_hit_top_n(item_id, valid_recs, 5)
            hits_at_5_count += hit_at_5
            hit_at_10, index_at_10 = self._verify_hit_top_n(item_id, valid_recs, 10)
            hits_at_10_count += hit_at_10

        # Recall is the rate of the listened items that are ranked among the Top-N recommended items, 
        # when mixed with a set of non-relevant items
        recall_at_5 = hits_at_5_count / float(listened_items_count_testset)
        recall_at_10 = hits_at_10_count / float(listened_items_count_testset)

        person_metrics = {'hits@5_count': hits_at_5_count,
                          'hits@10_count': hits_at_10_count,
                          'listened_count': listened_items_count_testset,
                          'recall@5': recall_at_5,
                          'recall@10': recall_at_10}
        return person_metrics

    def evaluate_model(self, model):
        # print('Running evaluation for users')
        people_metrics = []
        for idx, person_id in enumerate(tqdm(list(ratings_df_test.index.unique().values))):
            # if idx % 100 == 0 and idx > 0:
            #    print('%d users processed' % idx)
            person_metrics = self.evaluate_model_for_user(model, person_id)
            person_metrics['user_id'] = person_id
            people_metrics.append(person_metrics)
        print('%d users processed' % idx)

        detailed_results_df = pd.DataFrame(people_metrics) \
            .sort_values('listened_count', ascending=False)

        global_recall_at_5 = detailed_results_df['hits@5_count'].sum() / float(
            detailed_results_df['listened_count'].sum())
        global_recall_at_10 = detailed_results_df['hits@10_count'].sum() / float(
            detailed_results_df['listened_count'].sum())

        global_metrics = {'modelName': model.get_model_name(),
                          'recall@5': global_recall_at_5,
                          'recall@10': global_recall_at_10}
        return global_metrics, detailed_results_df


model_evaluator = ModelEvaluator()    

In [9]:
class CFRecommender:
    
    MODEL_NAME = 'Collaborative Filtering'
    
    def __init__(self, cf_predictions_df):
        self.cf_predictions_df = cf_predictions_df
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def recommend_items(self, user_id, items_to_ignore=[], topn=10):
        # Get and sort the user's predictions
        sorted_user_predictions = self.cf_predictions_df[user_id].sort_values(ascending=False) \
                                    .reset_index().rename(columns={user_id: 'recStrength'})

        # Recommend the highest predicted rating movies that the user hasn't seen yet.
        recommendations_df = sorted_user_predictions[~sorted_user_predictions['track_id'].isin(items_to_ignore)] \
                               .sort_values('recStrength', ascending = False) \
                               .head(topn)

        return recommendations_df
    

In [10]:
pivot_train = ratings_df.pivot_table(index='user_id', columns='track_id', values='rating').fillna(0)
csr_coll_matrix_train = csr_matrix(pivot_train)

In [11]:
NUMBER_OF_FACTORS_MF = 15

U, sigma, Vt = svds(csr_coll_matrix_train, k = NUMBER_OF_FACTORS_MF)

sigma = np.diag(sigma)

In [12]:
predicted_ratings_train = np.dot(np.dot(U, sigma), Vt) 
predicted_ratings_train_norm = (predicted_ratings_train - 
                                   predicted_ratings_train.min()) / (predicted_ratings_train.max()
                                                                        - predicted_ratings_train.min())

In [13]:
cf_preds_df = pd.DataFrame(predicted_ratings_train_norm,
                           columns = pivot_train.columns,
                           index=list(pivot_train.index)).transpose()

In [14]:
cf_recommender_model = CFRecommender(cf_preds_df)

In [15]:
model_evaluator = ModelEvaluator() 

In [16]:
cf_global_metrics, cf_detailed_results_df = model_evaluator.evaluate_model(cf_recommender_model)

  0%|          | 0/1856 [00:00<?, ?it/s]

1855 users processed


In [17]:
print('\nGlobal metrics:\n%s' % cf_global_metrics)
cf_detailed_results_df.head(10)


Global metrics:
{'modelName': 'Collaborative Filtering', 'recall@5': 0.3301917296384561, 'recall@10': 0.4712166029962995}


Unnamed: 0,hits@5_count,hits@10_count,listened_count,recall@5,recall@10,user_id
104,81,122,371,0.218329,0.328841,48315
661,164,200,358,0.458101,0.558659,6110
192,114,144,358,0.318436,0.402235,24770
29,122,170,353,0.345609,0.481586,3556
3,194,231,333,0.582583,0.693694,28467
2,82,121,309,0.265372,0.391586,41663
23,120,149,294,0.408163,0.506803,47286
105,115,158,293,0.392491,0.539249,8760
313,175,212,292,0.599315,0.726027,73816
34,64,101,288,0.222222,0.350694,1592


In [18]:
def dcg_at_k(r, k, method=0):
    """Score is discounted cumulative gain (dcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> dcg_at_k(r, 1)
    3.0
    >>> dcg_at_k(r, 1, method=1)
    3.0
    >>> dcg_at_k(r, 2)
    5.0
    >>> dcg_at_k(r, 2, method=1)
    4.2618595071429155
    >>> dcg_at_k(r, 10)
    9.6051177391888114
    >>> dcg_at_k(r, 11)
    9.6051177391888114
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Discounted cumulative gain
    """
    r = np.asfarray(r)[:k]
    if r.size:
        if method == 0:
            return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
        elif method == 1:
            return np.sum(r / np.log2(np.arange(2, r.size + 2)))
        else:
            raise ValueError('method must be 0 or 1.')
    return 0.


def ndcg_at_k(r, k, method=0):
    """Score is normalized discounted cumulative gain (ndcg)
    Relevance is positive real values.  Can use binary
    as the previous methods.
    Example from
    http://www.stanford.edu/class/cs276/handouts/EvaluationNew-handout-6-per.pdf
    >>> r = [3, 2, 3, 0, 0, 1, 2, 2, 3, 0]
    >>> ndcg_at_k(r, 1)
    1.0
    >>> r = [2, 1, 2, 0]
    >>> ndcg_at_k(r, 4)
    0.9203032077642922
    >>> ndcg_at_k(r, 4, method=1)
    0.96519546960144276
    >>> ndcg_at_k([0], 1)
    0.0
    >>> ndcg_at_k([1], 2)
    1.0
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
        k: Number of results to consider
        method: If 0 then weights are [1.0, 1.0, 0.6309, 0.5, 0.4307, ...]
                If 1 then weights are [1.0, 0.6309, 0.5, 0.4307, ...]
    Returns:
        Normalized discounted cumulative gain
    """
    dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
    if not dcg_max:
        return 0.
    return dcg_at_k(r, k, method) / dcg_max