In [17]:
#default_exp user_based

In [None]:
import os
import pickle
import attr

import pandas as pd
import scipy
import numpy as np

from game_recommender import steam_data, content_based

In [2]:
%cd ..

/home/kuba/Projects/game_recommender


In [3]:
game_dataset_pickle_path = 'data/game_dataset.pkl'
if os.path.exists(game_dataset_pickle_path):
    game_dataset_raw = pickle.load(open(game_dataset_pickle_path, 'rb'))
else:
    steam_ratings_df = steam_data.filter_ratings_with_metadata(raw_steam_ratings_df)
    game_dataset_raw = steam_data.RecommenderDataset.make_implicit_feedback_dataset(steam_ratings_df)
    pickle.dump(game_dataset_raw, open(game_dataset_pickle_path, 'wb'))

game_dataset = (
    game_dataset_raw
    .filter_out_insufficient_reviews('user_id', 5)
    .filter_out_insufficient_reviews('name', 2)
)

In [4]:
user_game_matrix = steam_data.get_item_user_matrix(game_dataset)
steam_ratings_df, target = game_dataset.data, game_dataset.target

In [8]:
#export


@attr.s
class UserBasedRecommender:
    
    user_similarity_searcher: content_based.SimilaritySearcher = attr.ib()
    ratings_matrix = attr.ib()
    games = attr.ib()
    EPS = 1e-6
        
    def recommend_games_from_similar_users(
            self,
            user_id=None,
            user_vector=None,
            n_recommended=10,
            n_similar=5):
        assert not user_id is None or not user_ratings is None, 'Need to specify user id or vector'
        if user_id is not None:
            similar_users, similarities = self.user_similarity_searcher.find_similar(chosen_items=[user_id], n_similar=n_similar)
        else:
            similar_users, similarities = self.user_similarity_searcher.find_similar(chosen_vectors=user_vector.reshape(1,-1), n_similar=n_similar)
        mean_similar_users = similar_users.pop('mean')
        mean_similarities = similarities.pop('mean_similarity')
        similarities = similarities.values.reshape(-1)
        similar_user_idxs = self.user_similarity_searcher.df[self.user_similarity_searcher.df['user_id'].isin(similar_users.values.reshape(-1))].index
        similar_users_ratings = self.ratings_matrix[similar_user_idxs]
        predicted_ratings_by_user = (np.diag(similarities / similarities.max())) @ similar_users_ratings
        
        pred_ratings_by_user_sum = (predicted_ratings_by_user > 0).sum(axis=0)
        pred_ratings_by_user_sum[pred_ratings_by_user_sum < self.EPS] = 1
        predicted_ratings = predicted_ratings_by_user.sum(axis=0) / pred_ratings_by_user_sum
        best_rated_games_idxs = predicted_ratings.argsort()[::-1][:n_recommended]
        best_rated_games = self.games[best_rated_games_idxs]
        return best_rated_games, predicted_ratings[best_rated_games_idxs]
    
    @classmethod
    def make_from_ratings(cls, ratings_df, target, target_col):
        ratings_df = pd.concat([steam_ratings_df, target], axis=1)
        item_user_pivoted_df = ratings_df.pivot_table(index='user_id', columns='name', values=target_col)
        user_game_matrix = scipy.sparse.csr_matrix(item_user_pivoted_df.fillna(0))   
        user_df = pd.DataFrame({'user_id': item_user_pivoted_df.index})
        user_similarity_searcher = content_based.SimilaritySearcher(user_df, user_game_matrix, name_col='user_id')
        games = item_user_pivoted_df.columns
        return UserBasedRecommender(user_similarity_searcher, user_game_matrix, games)

In [9]:
user_based_recommender = UserBasedRecommender.make_from_ratings(game_dataset.data, game_dataset.target, 'log_hours')

In [12]:
ratings_df = pd.concat([game_dataset.data, game_dataset.target], axis=1)
item_user_pivoted_df = ratings_df.pivot_table(index='name', columns='user_id', values='log_hours')

In [18]:
i = 10
user_id = user_based_recommender.user_similarity_searcher.df.iloc[10,0]
user_ratings = item_user_pivoted_df.iloc[:,10]

In [19]:
user_based_recommender.recommend_games_from_similar_users(user_id, n_recommended=10, n_similar=5)

(Index(['counter strike global offensive', 'counter strike',
        'counter strike source', 'half life', 'team fortress classic',
        'half life opposing force', 'half life blue shift', 'ricochet',
        'day of defeat', 'deathmatch classic'],
       dtype='object', name='name'),
 array([4.68298243, 2.46533538, 0.68583722, 0.66945441, 0.66945441,
        0.66945441, 0.66945441, 0.66053106, 0.66053106, 0.66053106]))

In [20]:
user_based_recommender.recommend_games_from_similar_users(user_vector=user_based_recommender.ratings_matrix[i], n_recommended=10, n_similar=5)

(Index(['counter strike global offensive', 'counter strike',
        'counter strike source', 'half life', 'team fortress classic',
        'half life opposing force', 'half life blue shift', 'ricochet',
        'day of defeat', 'deathmatch classic'],
       dtype='object', name='name'),
 array([4.68298243, 2.46533538, 0.68583722, 0.66945441, 0.66945441,
        0.66945441, 0.66945441, 0.66053106, 0.66053106, 0.66053106]))