In [1]:
#default_exp evaluation

In [14]:
#export

import os
import pickle
import attr

import pandas as pd
import scipy
import numpy as np

from game_recommender import steam_data, content_based, user_based, hybrid

In [4]:
%cd ..

/home/kuba/Projects/game_recommender


In [6]:
game_dataset = steam_data.get_steam_ratings_dataset()

In [5]:
user_based_recommender = user_based.UserBasedRecommender.make_from_steam_ratings('log_hours')
content_based_recommender = content_based.ContentBasedRecommender.make_from_steam_metadata()

In [19]:
#export


def get_recall_at(user_ratings, recommendations, k=10):
    user_games = user_ratings.sort_values(ascending=False)[:k]
    recommended_games = recommendations.sort_values(ascending=False)[:k]
    return recommended_games.index.isin(user_games.index).sum() / len(user_games)


def fill_nonexistent_with_zeros(source, target):
    not_in_source = target[~target.index.isin(source.index)].index
    zeroed_not_in_source = pd.Series(index=not_in_source, data=np.zeros_like(not_in_source))
    return pd.concat([source, zeroed_not_in_source])


def make_data_for_correlation_comparison(user_ratings, recommendations):
    user_ratings_with_zeros = fill_nonexistent_with_zeros(recommendations, user_ratings)
    recommendations_with_zeros = fill_nonexistent_with_zeros(user_ratings, recommendations)[user_ratings_with_zeros.index]
    n_compared = min([len(user_ratings), len(recommendations)])
    return user_ratings_with_zeros, recommendations_with_zeros, n_compared


def get_correlation(user_ratings, recommendations, method=scipy.stats.spearmanr):
    user_ratings_with_zeros, recommendations_with_zeros, n_compared = make_data_for_correlation_comparison(user_ratings, recommendations) 
    return method(user_ratings_with_zeros[:n_compared], recommendations_with_zeros[:n_compared])[0]

In [20]:
ratings_df = pd.concat([game_dataset.data, game_dataset.target], axis=1)
user_item_df = steam_data.get_user_item_df(ratings_df)

In [21]:
recommender = hybrid.HybridRecommender(content_based_recommender, user_based_recommender)

recommendations = recommender.get_weighted_recommendations(user_ratings, 20, user_recommendation_weight=1, content_recommendation_weight=1, mean_content_recommendation_weight=1)

In [22]:
i = 10
user_id = user_based_recommender.user_similarity_searcher.df.iloc[10,0]
user_ratings_raw = user_item_df.iloc[10]
user_ratings = user_ratings_raw[~user_ratings_raw.isna()]

In [23]:
get_recall_at(user_ratings, recommendations)

0.3

In [25]:
fill_nonexistent_with_zeros(user_ratings, recommendations)

counter strike                      3.13549
counter strike global offensive     6.19441
counter strike nexon zombies       0.693147
counter strike source              0.693147
day of defeat                      0.693147
deathmatch classic                 0.693147
eldevin                            0.693147
half life                          0.693147
half life blue shift               0.693147
half life opposing force           0.693147
planetside 2                       0.693147
raceroom racing experience         0.336472
realm of the mad god               0.693147
ricochet                           0.693147
team fortress classic              0.693147
unturned                           0.693147
warface                            0.693147
call of duty 4 modern warfare             0
world of tanks blitz mmo                  0
tanki online                              0
blockade classic                          0
team fortress 2                           0
post scriptum                   

In [26]:
get_correlation(user_ratings, recommendations, method=scipy.stats.kendalltau)

0.10891622972743417