In [2]:
import datetime as dt
import json
from operator import itemgetter

import numpy as np
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from game_lists_site.models import (
    Game,
    GameCBR,
    GameDeveloper,
    GameGenre,
    GameStats,
    GameTag,
    System,
    User,
    UserCBR,
    UserGame,
    db,
)
from game_lists_site.utils.utils import days_delta, get_game_stats, normalize_dict

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
def get_cbr_for_game(game, result_count=9, min_player_count=28):
    system, _ = System.get_or_create(key="GameCBR")
    if not system.date_time_value or days_delta(system.date_time_value) >= 7:
        print("get_cbr_for_game")
        games = []
        features = []
        for gs in GameStats.select(GameStats.game, GameStats.features).where(
            GameStats.player_count >= min_player_count
        ):
            games.append(gs.game)
            features.append(gs.features)
        vectorizer = CountVectorizer()
        X = vectorizer.fit_transform(features)
        csr = cosine_similarity(X, X)  # cosine similarity result
        for g_a, row in zip(games, csr):
            l = 0
            precision = 0.7
            while l < 50:
                result = dict(
                    sorted(
                        [
                            (g_b.id, value)
                            for g_b, value in zip(games, row)
                            if value >= precision
                        ],
                        key=itemgetter(1),
                        reverse=True,
                    )
                )
                l = len(result)
                precision -= 0.05
            game_cbr, _ = GameCBR.get_or_create(game=g_a)
            game_cbr.data = json.dumps(result)
            game_cbr.save()
        system.date_time_value = dt.datetime.now()
        system.save()
    game_cbr = GameCBR.get_or_none(game=game)
    if game_cbr:
        data = {
            Game.get_by_id(game_id): value
            for game_id, value in json.loads(game_cbr.data).items()
        }
        return dict(list(data.items())[1 : result_count + 1])
    else:
        return {}

In [4]:
def get_cbr_for_user(user, played_user_games, result_count=9, cbr_for_game_result_count = 6, min_player_count = 10,  force=False):
    if not user.last_cbr_update_time or days_delta(user.last_cbr_update_time) >= 7 or force:
        print("get_cbr_for_user")
        played_games = [ug.game for ug in played_user_games]
        user_games_with_score = played_user_games.where(UserGame.score != None)
        games_with_score = [ug.game for ug in user_games_with_score]
        result = {}
        for user_game, game_cbr_result in zip(
            user_games_with_score, [get_cbr_for_game(g, cbr_for_game_result_count, min_player_count) for g in games_with_score]
        ):
            if game_cbr_result:
                for sim_game in game_cbr_result:
                    if sim_game not in played_games and sim_game.rating >= 7:
                        if sim_game.id not in result:
                            result[sim_game.id] = (
                                user_game.score * game_cbr_result[sim_game]
                            )
                        else:
                            result[sim_game.id] += (
                                user_game.score * game_cbr_result[sim_game]
                            )
        user_cbr, _ = UserCBR.get_or_create(user=user)
        user_cbr.data = json.dumps(normalize_dict(
            dict(sorted(result.items(), key=lambda x: x[1], reverse=True)))
        )
        user_cbr.save()
        user.last_cbr_update_time = dt.datetime.now()
        user.save()
    data = {
        Game.get_by_id(game_id): value
        for game_id, value in json.loads(
            UserCBR.get_or_none(UserCBR.user == user).data
        ).items()
    }
    return dict(list(data.items())[:result_count])

In [6]:
db.rollback()
# user.last_cbr_update_time = None
# user.save()
# game = Game.get_by_id(412020)

In [20]:
users = []
for user in User.select():
    count = (
        UserGame.select(UserGame.score)
        .where(UserGame.user == user)
        .where(UserGame.score > 0)
        .count()
    )
    if count >= 10:
        users.append(user)
print(len(users))

7


In [21]:
np.quantile([gs.player_count for gs in GameStats.select()], 0.90)

11.0

In [22]:
import random
from itertools import combinations_with_replacement
import pandas as pd

import matplotlib.pyplot as plt
from IPython.display import clear_output, display

max_accuracy = 0 # 2
best_cbr_for_game_result_count = 0 # 2
best_min_player_count = 0  # 54
comb = combinations_with_replacement(range(1, 50, 1), 2)
xline = []
yline = []
zline = []
data = {}
for user in users:
    played_user_games = UserGame.select().where(UserGame.user == user).where(UserGame.playtime > 0)
    last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
    input_data = played_user_games.where(UserGame.last_played < last_played)
    check_data = played_user_games.where(UserGame.last_played >= last_played)
    data[user] = [input_data, check_data]
for i, c in enumerate(comb):
    system, _ = System.get_or_create(key="GameCBR")
    system.date_time_value = None
    system.save()
    cbr_for_game_result_count = c[0]
    min_player_count = c[1]
    accuracy = []
    for user, d in data.items():
        result = get_cbr_for_user(user, d[0], d[1].count(), cbr_for_game_result_count, min_player_count, True)
        intersection_count = len(set([ug.game for ug in d[1]]).intersection(list(result.keys())))
        accuracy.append(intersection_count / len(d[1]))
    accuracy = np.mean(accuracy)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        best_cbr_for_game_result_count = cbr_for_game_result_count
        best_min_player_count = min_player_count
    xline.append(cbr_for_game_result_count)
    yline.append(min_player_count)
    zline.append(accuracy)
    clear_output(wait=True)
    print(f"Iteration #{i}")
    print("cbr_for_game_result_count", cbr_for_game_result_count)
    print("min_player_count", min_player_count)
    print(accuracy)
    print("-"*24)
    print(f"Best Iteration")
    print("cbr_for_game_result_count", best_cbr_for_game_result_count)
    print("min_player_count", best_min_player_count)
    print(max_accuracy)
    print("-"*24)
    result = []
    for x,y,z in zip(xline, yline, zline):
        result.append((x,y,z))
    result = sorted(result, key=lambda x: x[2], reverse=True)
    result = sorted(result, key=lambda x: x[1])
    df = pd.DataFrame(result)
    df.to_csv("result.csv", index=False)
# clear_output(wait=True)


Iteration #1224
cbr_for_game_result_count 49
min_player_count 49
0.06182472989195679
------------------------
Best Iteration
cbr_for_game_result_count 8
min_player_count 46
0.12407424508264842
------------------------
