In [195]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import cosine_similarity

# Data preparation (loading and preprocessing)

First of all we need to load our dataset (See `README.md` for details).

In [None]:
movies = pd.read_csv('../../data/ml-1m/movies.dat', sep='::', header=None, names=['movie_id', 'title', 'genres'], engine='python', encoding='latin-1')
ratings = pd.read_csv('../../data/ml-1m/ratings.dat', sep='::', header=None, names=['user_id', 'movie_id', 'rating', 'timestamp'], engine='python', encoding='latin-1')

In [None]:
ratings.head()

## User-to-user collaborative filtering

Sources: The weighted average algorithm was inspired by [this](https://medium.com/analytics-vidhya/recommendation-system-using-collaborative-filtering-cc310e641fde) Medium article

In [None]:
X_ratings_train, X_ratings_test, y_ratings_train, y_ratings_test = train_test_split(ratings, ratings['user_id'], stratify=ratings['user_id'], test_size=0.2, random_state=5)

In [None]:
X_ratings_train_by_user = X_ratings_train.pivot(index='user_id', columns='movie_id', values='rating')

In [7]:
X_ratings_train_by_user.head()

movie_id,1,2,3,4,5,6,7,8,9,10,...,3943,3944,3945,3946,3947,3948,3949,3950,3951,3952
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,,,,,,,,,,,...,,,,,,,,,,
2,,,,,,,,,,,...,,,,,,,,,,
3,,,,,,,,,,,...,,,,,,,,,,
4,,,,,,,,,,,...,,,,,,,,,,
5,,,,,,2.0,,,,,...,,,,,,,,,,


In [8]:
df_ratings_dummy = X_ratings_train_by_user.copy().fillna(0)
similarity_matrix = cosine_similarity(df_ratings_dummy, df_ratings_dummy)
similarity_matrix_df = pd.DataFrame(similarity_matrix, index=df_ratings_dummy.index, columns=df_ratings_dummy.index)

In [9]:
def weighed_avg_rating_for_movie(id_movie, id_user):

    if not id_movie in X_ratings_train_by_user:
        return 2.5 #average
    cosine_scores = similarity_matrix_df[id_user] #similarity of id_user with every other user
    ratings_scores = X_ratings_train_by_user[id_movie]      #ratings of every other user for the movie id_movie
    #won't consider users who havent rated id_movie so drop similarity scores and ratings corresponsing to np.nan
    index_not_rated = ratings_scores[ratings_scores.isnull()].index
    ratings_scores = ratings_scores.dropna()
    cosine_scores = cosine_scores.drop(index_not_rated)
    #calculating rating by weighted mean of ratings and cosine scores of the users who have rated the movie
    ratings_movie = np.dot(ratings_scores, cosine_scores)/cosine_scores.sum()

    return ratings_movie

Average rating for movie #150 for user #350

In [10]:
weighed_avg_rating_for_movie(150, 350) 

4.086058296388687

In [35]:
def score_on_test_set(X_test):
    user_movie_pairs = zip(X_test['movie_id'], X_test['user_id'])
    predicted_ratings = np.array([weighed_avg_rating_for_movie(movie, user) for (movie,user) in user_movie_pairs])
    true_ratings = np.array(X_test['rating'])
    score = np.sqrt(mean_squared_error(true_ratings, predicted_ratings))
    return score

In [37]:
score_on_test_set(X_ratings_test)

--(array([3, 4, 3, 2, 1, 2, 2, 1, 5, 4]), array([3.16042448, 3.74945348, 3.77095095, 3.92528707, 3.22930641,
       1.63100507, 3.2415709 , 3.72150037, 3.80263468, 4.24424964]))


0.976425275471315

# Collaborative filtering evaluation

**Note:** before reading this section, it is highly adviced to first read though the `1_5_Evaluation_methodology` experiment, and also both baseline experiments, as there are a lot of overview and evaluation insights.

## User-to-user collaborative filtering

In [197]:
import sys
import os.path as osp

PROJECT_DIR = '../../'
PROJECT_DIR = osp.abspath(PROJECT_DIR)
print(PROJECT_DIR in sys.path)
if PROJECT_DIR not in sys.path:
    print(f'Adding project directory to the sys.path: {PROJECT_DIR!r}')
    sys.path.insert(1, PROJECT_DIR)

True


In [198]:
from src.models.abstract_rs_model import AbstractRSModel

In [199]:
from src.evaluation import EvaluationPipeline

In [200]:
class CollaborativeFilteringUUModel(AbstractRSModel):
    def _weighted_avg_rating_for_movie(self, train_data, similarity_matrix_df, id_movie, id_user):
        if not id_movie in train_data or not id_user in similarity_matrix_df:
            return 2.5 #average
        cosine_scores = similarity_matrix_df[id_user] #similarity of id_user with every other user
        ratings_scores = train_data[id_movie]      #ratings of every other user for the movie id_movie
        #won't consider users who havent rated id_movie so drop similarity scores and ratings corresponsing to np.nan
        index_not_rated = ratings_scores[ratings_scores.isnull()].index
        # print(f'--index_not_rated {index_not_rated} {len(cosine_scores)} {len(ratings_scores)}')
        ratings_scores = ratings_scores.dropna()
        cosine_scores = cosine_scores.drop(index_not_rated)
        # print(f'--index_not_rated {index_not_rated} {len(cosine_scores)} {len(ratings_scores)}')
        #calculating rating by weighted mean of ratings and cosine scores of the users who have rated the movie
        ratings_movie = np.dot(ratings_scores, cosine_scores)/cosine_scores.sum()
        return ratings_movie
    
    def fit(self, train_data, pre_fit: bool = False):
        pass

    def predict(self, data_at_test_timestamp, test_user, test_timestamp):
        # print(f'--users list {test_user} {test_timestamp} {data_at_test_timestamp["Timestamp"].max()} {np.sort(data_at_test_timestamp["UserID"].unique())}')
        ratings_by_user = data_at_test_timestamp.pivot(index='UserID', columns='MovieID', values='Rating')
        df_ratings_dummy = ratings_by_user.copy().fillna(0)
        similarity_matrix = cosine_similarity(df_ratings_dummy, df_ratings_dummy)
        similarity_matrix_df = pd.DataFrame(similarity_matrix,
                                            index=df_ratings_dummy.index,
                                            columns=df_ratings_dummy.index)
        items_candidates = data_at_test_timestamp['MovieID'].unique()
        movies_already_watched = data_at_test_timestamp[
            data_at_test_timestamp['UserID'] == test_user]['MovieID'].unique()
        items_candidates = [item_id for item_id in items_candidates if item_id not in movies_already_watched]
        collaborative_ratings = {}
        for item_id in items_candidates:
            collaborative_ratings[item_id] = self._weighted_avg_rating_for_movie(
                ratings_by_user, similarity_matrix_df, item_id, test_user)
        collaborative_ratings = pd.Series(collaborative_ratings)
        # collaborative_ratings = (
        #     collaborative_ratings/(collaborative_ratings.max() - collaborative_ratings.min()))*4 + 1
        collaborative_ratings = collaborative_ratings.sort_values(
            ascending=False) # kind='mergesort'
        return collaborative_ratings.index.to_numpy(), collaborative_ratings.to_numpy()

    def fit_predict(self, data, test_user, test_timestamp):
        self.fit(data)
        return self.predict(data, test_user, test_timestamp)

In [201]:
df_ratings = pd.read_csv('../../data/ml-1m/ratings.dat',
                         delimiter='::',
                         header=None,
                         names=['UserID','MovieID','Rating','Timestamp'],
                         engine ='python')

Let's test the model for just a single point to view in what format the results are:

In [202]:
cf_uu_model = CollaborativeFilteringUUModel()
cf_uu_model.fit(eval_cf_uu.train_data)

In [203]:
predicted_ids, predicted_ratings = cf_uu_model.predict(df_ratings[df_ratings['Timestamp'] < 978301619], 1, 978301619)
# The rated movie is 2028, ground truth rating 5.0

In [204]:
predicted_ratings[list(predicted_ids).index(2028)]

4.31798032114613

And than let's do the evaluation. Also, for this model the both evaluation approaches are the same, as it has no training in the traditional sense.

In [205]:
import warnings
warnings.filterwarnings("ignore", message="invalid value encountered in scalar divide")
# As there are a lot of such situations in the early time points

In [182]:
eval_cf_uu = EvaluationPipeline(df_ratings, 0.001) # .sample(frac=0.01, random_state=5)

In [183]:
metrics_output_dict_cf_uu = eval_cf_uu.evaluate( # recommendation_results_baseline
    cf_uu_model,
    user_average_metrics=False,
    retrain_model_each_point=False)

  0%|          | 0/402 [00:00<?, ?it/s]

In [184]:
metrics_output_dict_cf_uu

{'mae': 2.2080150117431967,
 'rmse': 2.6638655483638836,
 'precision': 0.0,
 'average_precision': 0.9974747474747475,
 'mean_reciprocal_rank': 0.003836916995165856,
 'ndcg': 0.9947807646755525,
 'coverage': 0.013741721854304636}

In [187]:
import json

In [188]:
with open('cf_uu_no_updates_metrics.json', 'w') as f:
    json.dump(metrics_output_dict_cf_uu, f)

Worth noting, that the lower `MAE` and `RMSE` scores here than for the random train-test split without time dimension are explained by the fact that the similarity of the users can be measured with more accuracy the more data we have. But for the earlier users we can have a situation where a lot of the users who may be similar to them have still rated few movies, resulting in the early low-data prediction being worse. However, this is exasctly how the algorithm would work in the real application, as we would start from the less data and in process increase its amount and the quality of our predictions.

Also, here the value of the average precision is by far the highest among all the reviewed algorithms so far. In our algorithm (refer to `src/evaluation.py`), average precision is calculated with the number `m` equal to the number of user's ratings in the test set. Comparing with the record low precision itself, this points out how the model is able to very accurately place the needed recommendation among the top points, but is not very good at rating it the highest. Still, the record-low so far values of `MAE` and `RMSE` point out that the model is good at predicting the movie's rating itself from similar users, so this model is a step in the right direction.