In [1]:
#default_exp user_based

In [2]:
#export

import os
import pickle
import attr

import pandas as pd
import scipy
import numpy as np

from game_recommender import steam_data, content_based

In [3]:
%cd ..

/home/kuba/Projects/game_recommender


In [4]:
game_dataset = steam_data.get_steam_ratings_dataset()

In [7]:
user_game_matrix = game_dataset.get_user_item_df()
steam_ratings_df, target = game_dataset.data, game_dataset.target

In [8]:
steam_ratings_df

Unnamed: 0,user_id,name,ownership,hours,played,interacted,name_unnormalized
0,11794760,1... 2... 3... kick it drop that beat like an ...,purchase,1.0,0,1,1... 2... 3... KICK IT! (Drop That Beat Like a...
1,35701646,1... 2... 3... kick it drop that beat like an ...,purchase,1.0,0,1,1... 2... 3... KICK IT! (Drop That Beat Like a...
2,49893565,1... 2... 3... kick it drop that beat like an ...,play,2.4,1,1,1... 2... 3... KICK IT! (Drop That Beat Like a...
3,50818751,1... 2... 3... kick it drop that beat like an ...,play,5.0,1,1,1... 2... 3... KICK IT! (Drop That Beat Like a...
4,58345543,1... 2... 3... kick it drop that beat like an ...,play,11.2,1,1,1... 2... 3... KICK IT! (Drop That Beat Like a...
...,...,...,...,...,...,...,...
84293,62985928,zuma deluxe,purchase,1.0,0,1,Zuma Deluxe
84294,12144171,zumas revenge,play,1.2,1,1,Zuma's Revenge
84295,59825286,zumas revenge,purchase,1.0,0,1,Zuma's Revenge
84296,69954842,zumas revenge,play,9.3,1,1,Zuma's Revenge


In [9]:
#export


@attr.s
class UserBasedRecommender:
    
    user_similarity_searcher: content_based.SimilaritySearcher = attr.ib()
    ratings_matrix = attr.ib()
    games = attr.ib()
    EPS = 1e-6
        
    def recommend_games_from_similar_users(
            self,
            user_id=None,
            user_ratings=None,
            n_recommended=10,
            n_similar=5):
        assert not user_id is None or not user_ratings is None, 'Need to specify user id or vector'
        if user_id is not None:
            similar_users, similarities = self.user_similarity_searcher.find_similar(chosen_items=[user_id], n_similar=n_similar+1)
            similar_users = similar_users.iloc[1:]
            similarities = similarities.iloc[1:]
            mean_similar_users = similar_users.pop('mean').iloc[:n_similar]
            mean_similarities = similarities.pop('mean_similarity').iloc[:n_similar]
        else:
            user_vector = self.get_user_vector(user_ratings)
            similar_users, similarities = self.user_similarity_searcher.find_similar(chosen_vectors=user_vector, n_similar=n_similar)
            mean_similar_users = similar_users.pop('mean')
            mean_similarities = similarities.pop('mean_similarity')
        similarities = similarities.values.reshape(-1)
        similar_user_idxs = self.user_similarity_searcher.df[self.user_similarity_searcher.df['user_id'].isin(similar_users.values.reshape(-1))].index
        similar_users_ratings = self.ratings_matrix[similar_user_idxs]
        predicted_ratings_by_user = (np.diag(similarities)) @ similar_users_ratings
        
        pred_ratings_by_user_sum = (predicted_ratings_by_user > 0).sum(axis=0)
        pred_ratings_by_user_sum[pred_ratings_by_user_sum < self.EPS] = 1
        predicted_ratings = predicted_ratings_by_user.sum(axis=0) / pred_ratings_by_user_sum
        best_rated_games_idxs = predicted_ratings.argsort()[::-1][:n_recommended]
        best_rated_games = self.games[best_rated_games_idxs]
        return pd.Series(index=best_rated_games, data=predicted_ratings[best_rated_games_idxs])
    
    @staticmethod
    def make_from_ratings(ratings_df, target, target_col):
        ratings_df = pd.concat([ratings_df, target], axis=1)
        item_user_pivoted_df = ratings_df.pivot_table(index='user_id', columns='name', values=target_col)
        user_game_matrix = scipy.sparse.csr_matrix(item_user_pivoted_df.fillna(0))   
        user_df = pd.DataFrame({'user_id': item_user_pivoted_df.index})
        user_similarity_searcher = content_based.SimilaritySearcher(user_df, user_game_matrix, name_col='user_id')
        games = item_user_pivoted_df.columns
        return UserBasedRecommender(user_similarity_searcher, user_game_matrix, games)
    
    @staticmethod
    def make_from_steam_ratings(target_col='log_hours'):
        game_dataset = steam_data.get_steam_ratings_dataset()
        return UserBasedRecommender.make_from_ratings(game_dataset.data, game_dataset.target, target_col)
    
    def get_user_vector(self, user_ratings):
        game_indices = [self.games.get_loc(g) for g in user_ratings.index]
        return scipy.sparse.csr_matrix(
            (user_ratings.values, (np.zeros(len(game_indices)), game_indices)),
            shape=(1,len(self.games))
        )

In [10]:
user_based_recommender = UserBasedRecommender.make_from_ratings(game_dataset.data, game_dataset.target, 'log_hours')

In [11]:
game_dataset.data.shape

(70499, 7)

In [12]:
ratings_df = pd.concat([game_dataset.data, game_dataset.target], axis=1)
item_user_pivoted_df = ratings_df.pivot_table(index='user_id', columns='name', values='log_hours')

In [13]:
i = 10
user_id = user_based_recommender.user_similarity_searcher.df.iloc[10,0]
user_ratings = item_user_pivoted_df.iloc[10,:]

In [14]:
user_ratings = user_ratings[~user_ratings.isna()]

In [15]:
user_based_recommender.ratings_matrix[10].sum()

19.37043237149426

In [16]:
recommendations = user_based_recommender.recommend_games_from_similar_users(user_id, n_recommended=10, n_similar=5)
recommendations

name
counter strike global offensive    4.570937
counter strike                     2.403907
counter strike source              0.684478
half life 2 deathmatch             0.657027
team fortress classic              0.649659
half life opposing force           0.649659
half life blue shift               0.649659
half life                          0.649659
ricochet                           0.642990
day of defeat                      0.642990
dtype: float64

In [17]:
user_based_recommender.recommend_games_from_similar_users(user_ratings=user_ratings, n_recommended=10, n_similar=5)

name
counter strike global offensive    4.866433
counter strike                     1.914865
planetside 2                       0.693147
counter strike nexon zombies       0.693147
eldevin                            0.693147
realm of the mad god               0.693147
counter strike source              0.687834
half life opposing force           0.669454
half life blue shift               0.669454
team fortress classic              0.669454
dtype: float64