In [1]:
import pandas as pd
import numpy as np
import pickle
from scipy.sparse import csr_matrix
from scipy.sparse.linalg import svds
from sklearn.model_selection import train_test_split
import random
from tqdm.notebook import tqdm
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore", category=DeprecationWarning)
pd.options.mode.chained_assignment = None
np.random.seed = 42

In [2]:
listening_counts = pd.read_csv('listening-counts.tsv', sep='\t')
users = pd.read_csv('users.tsv', sep='\t')
users_sample = users.sample(2000, random_state=42)

In [3]:
counts_sample = listening_counts[listening_counts['user_id'].isin(users_sample['user_id'])]
counts_sample['rating'] = counts_sample['count'] / counts_sample.groupby('user_id')['count'].transform('max')
ratings_df = counts_sample[counts_sample['track_id'].isin(\
            list(counts_sample.groupby('track_id').sum()['rating'].sort_values(ascending=False)[:3000].index))]
ratings_df['tracks_count'] = ratings_df.groupby('user_id')['count'].transform('count')
ratings_df = ratings_df[ratings_df['tracks_count'] >= 5]
ratings_df.drop(columns=['count', 'tracks_count'], inplace=True)

In [4]:
ratings_df_train, ratings_df_test = train_test_split(ratings_df,
                                   stratify=ratings_df['user_id'],
                                   test_size=0.20,
                                   random_state=42)

print('Actions on Train set: %d' % len(ratings_df_train))
print('Actions on Test set: %d' % len(ratings_df_test))

Actions on Train set: 334009
Actions on Test set: 83503


In [5]:
ratings_df = ratings_df.set_index('user_id')
ratings_df_train = ratings_df_train.set_index('user_id')
ratings_df_test = ratings_df_test.set_index('user_id')

In [6]:
def get_items_listened(person_id, listened_df):
    # Get the user's data and merge in the movie information.
    listened_items = listened_df.loc[person_id]['track_id']
    return set(listened_items if type(listened_items) == pd.Series else [listened_items])

In [7]:
all_items = set(ratings_df['track_id'])

In [8]:
EVAL_RANDOM_SAMPLE_NON_INTERACTED_ITEMS = 100

class ModelEvaluator:

    def get_not_listened_items_sample(self, person_id, sample_size, seed=42):
        listened_items = get_items_listened(person_id, ratings_df)
        non_listened_items = all_items - listened_items

        random.seed(seed)
        non_listened_items_sample = random.sample(non_listened_items, sample_size)
        return set(non_listened_items_sample)

    def _verify_hit_top_n(self, item_id, recommended_items, topn):
        try:
            index = next(i for i, c in enumerate(recommended_items) if c == item_id)
        except:
            index = -1
        hit = int(index in range(0, topn))
        return hit, index

    def evaluate_model_for_user(self, model, person_id):
        # Getting the items in test set
        listened_values_testset = ratings_df_test.loc[person_id]
        if type(listened_values_testset['track_id']) == pd.Series:
            person_listened_items_testset = set(listened_values_testset['track_id'])
        else:
            person_listened_items_testset = set([int(listened_values_testset['track_id'])])
        listened_items_count_testset = len(person_listened_items_testset)
        # Getting a ranked recommendation list from a model for a given user
        person_recs_df = model.recommend_items(person_id,
                                               items_to_ignore=get_items_listened(person_id,
                                                                                    ratings_df_train),
                                               topn=100000000)
        hits_at_5_count = 0
        hits_at_10_count = 0
        # For each item the user has listened in test set
        for item_id in person_listened_items_testset:
            # Getting a random sample (100) items the user has not listened 
            # (to represent items that are assumed to be no relevant to the user)
            non_listened_items_sample = self.get_not_listened_items_sample(person_id,
                                                                               sample_size=EVAL_RANDOM_SAMPLE_NON_INTERACTED_ITEMS,
                                                                               seed=item_id % (2 ** 32))

            # Combining the current listened item with the 100 random items
            items_to_filter_recs = non_listened_items_sample.union(set([item_id]))

            # Filtering only recommendations that are either the listened item or from a random sample of 100 non-listened items
            valid_recs_df = person_recs_df[person_recs_df['track_id'].isin(items_to_filter_recs)]
            valid_recs = valid_recs_df['track_id'].values
            # Verifying if the current listened item is among the Top-N recommended items
            hit_at_5, index_at_5 = self._verify_hit_top_n(item_id, valid_recs, 5)
            hits_at_5_count += hit_at_5
            hit_at_10, index_at_10 = self._verify_hit_top_n(item_id, valid_recs, 10)
            hits_at_10_count += hit_at_10
        # Recall is the rate of the listened items that are ranked among the Top-N recommended items, 
        # when mixed with a set of non-relevant items
        recall_at_5 = hits_at_5_count / float(listened_items_count_testset)
        recall_at_10 = hits_at_10_count / float(listened_items_count_testset)

        person_metrics = {'hits@5_count': hits_at_5_count,
                          'hits@10_count': hits_at_10_count,
                          'listened_count': listened_items_count_testset,
                          'recall@5': recall_at_5,
                          'recall@10': recall_at_10}
        return person_metrics

    def evaluate_model(self, model):
        # print('Running evaluation for users')
        people_metrics = []
        for idx, person_id in enumerate(tqdm(list(ratings_df_test.index.unique().values))):
            # if idx % 100 == 0 and idx > 0:
            #    print('%d users processed' % idx)
            person_metrics = self.evaluate_model_for_user(model, person_id)
            person_metrics['user_id'] = person_id
            people_metrics.append(person_metrics)
        print('%d users processed' % idx)

        detailed_results_df = pd.DataFrame(people_metrics) \
            .sort_values('listened_count', ascending=False)

        global_recall_at_5 = detailed_results_df['hits@5_count'].sum() / float(
            detailed_results_df['listened_count'].sum())
        global_recall_at_10 = detailed_results_df['hits@10_count'].sum() / float(
            detailed_results_df['listened_count'].sum())

        global_metrics = {'modelName': model.get_model_name(),
                          'recall@5': global_recall_at_5,
                          'recall@10': global_recall_at_10}
        return global_metrics, detailed_results_df

model_evaluator = ModelEvaluator()    

In [9]:
class Baseline:
    MODEL_NAME = 'Baseline (Const Popularity)'
    
    def __init__(self, popularity_df):
        self.popularity_df = popularity_df
        
    def get_model_name(self):
        return self.MODEL_NAME
        
    def recommend_items(self, user_id, items_to_ignore=[], topn=10, verbose=False):
        # Рекомендуем самые популярные треки, которые еще не слушал пользователь
        temp_df = ratings_df.groupby('track_id')['rating'].sum().sort_values(ascending=False)\
                                        .reset_index().rename(columns={'rating': 'rateStrength'})
        recommendations_df = temp_df[~temp_df['track_id'].isin(items_to_ignore)] \
                               .sort_values('rateStrength', ascending = False) \
                               .head(topn)
        return recommendations_df

Baseline_model = Baseline(ratings_df_train)
base_global_metrics, base_detailed_results_df = model_evaluator.evaluate_model(Baseline_model)

  0%|          | 0/1856 [00:00<?, ?it/s]

1855 users processed


In [10]:
print('\nGlobal metrics:\n%s' % base_global_metrics)
base_detailed_results_df.head(10)


Global metrics:
{'modelName': 'Baseline (Const Popularity)', 'recall@5': 0.12110942121839934, 'recall@10': 0.20401662215728777}


Unnamed: 0,hits@5_count,hits@10_count,listened_count,recall@5,recall@10,user_id
104,56,90,371,0.150943,0.242588,48315
661,41,91,358,0.114525,0.25419,6110
192,86,117,358,0.240223,0.326816,24770
29,39,61,353,0.110482,0.172805,3556
3,28,62,333,0.084084,0.186186,28467
2,38,63,309,0.122977,0.203883,41663
23,45,71,294,0.153061,0.241497,47286
105,49,75,293,0.167235,0.255973,8760
313,66,96,292,0.226027,0.328767,73816
34,49,71,288,0.170139,0.246528,1592


In [11]:
class CFRecommender:
    
    MODEL_NAME = 'Collaborative Filtering'
    
    def __init__(self, cf_predictions_df, number_of_factors=15):
        self.cf_predictions_df = cf_predictions_df
        self.number_of_factors = number_of_factors
        self._prep_data()

    def _prep_data(self):
        pivot_train = self.cf_predictions_df.pivot_table(index='user_id', columns='track_id', values='rating').fillna(0)
        csr_coll_matrix_train = csr_matrix(pivot_train)
        U, sigma, Vt = svds(csr_coll_matrix_train, k = self.number_of_factors)
        sigma = np.diag(sigma)
        predicted_ratings_train = np.dot(np.dot(U, sigma), Vt) 
        predicted_ratings_train_norm = (predicted_ratings_train - 
                                        predicted_ratings_train.min()) / (predicted_ratings_train.max()
                                                                        - predicted_ratings_train.min())
        self.cf_predictions_df = pd.DataFrame(predicted_ratings_train_norm,
                           columns = pivot_train.columns,
                           index=list(pivot_train.index)).transpose()

    def get_model_name(self):
        return self.MODEL_NAME
        
    def recommend_items(self, user_id, items_to_ignore=[], topn=10):
        # Get and sort the user's predictions
        sorted_user_predictions = self.cf_predictions_df[user_id].sort_values(ascending=False) \
                                    .reset_index().rename(columns={user_id: 'recStrength'})

        # Recommend the highest predicted rating movies that the user hasn't seen yet.
        recommendations_df = sorted_user_predictions[~sorted_user_predictions['track_id'].isin(items_to_ignore)] \
                               .sort_values('recStrength', ascending = False) \
                               .head(topn)

        return recommendations_df


In [12]:
cf_recommender_model = CFRecommender(ratings_df_train, number_of_factors=60)

In [13]:
model_evaluator = ModelEvaluator()
cf_global_metrics, cf_detailed_results_df = model_evaluator.evaluate_model(cf_recommender_model)
print('\nGlobal metrics:\n%s' % cf_global_metrics)
cf_detailed_results_df.head(10)

  0%|          | 0/1856 [00:00<?, ?it/s]

1855 users processed

Global metrics:
{'modelName': 'Collaborative Filtering', 'recall@5': 0.33491012298959316, 'recall@10': 0.4600912542064357}


Unnamed: 0,hits@5_count,hits@10_count,listened_count,recall@5,recall@10,user_id
104,90,126,371,0.242588,0.339623,48315
661,136,161,358,0.379888,0.449721,6110
192,106,153,358,0.296089,0.427374,24770
29,91,125,353,0.25779,0.354108,3556
3,75,106,333,0.225225,0.318318,28467
2,66,94,309,0.213592,0.304207,41663
23,106,141,294,0.360544,0.479592,47286
105,61,91,293,0.208191,0.31058,8760
313,105,129,292,0.359589,0.441781,73816
34,72,104,288,0.25,0.361111,1592
