In [57]:
# imports
import datetime as dt
import json
import random
from itertools import combinations_with_replacement

import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

from game_lists_site.models import (
    BenchmarkUserCBR,
    BenchmarkUserMBCF,
    BenchmarkUserSimilarity,
    Game,
    System,
    User,
    UserGame,
    db,
)
from game_lists_site.utils.utils import (
    days_delta,
    get_cbr_for_game,
    get_normalized_playtimes,
    normalize_dict,
)

db.rollback()

In [58]:
# get users
users = []
for user in User.select(User.id, User.username):
    game_with_score_count = (
        UserGame.select(UserGame.id)
        .where((UserGame.user == user) & (UserGame.score > 0))
        .count()
    )
    if game_with_score_count > 1:
        users.append(user)
print(users)

[<User: 76561198125290350>, <User: 76561198094109207>, <User: 76561198091812571>, <User: 76561198067514875>, <User: 76561198083927294>, <User: 76561198394079733>, <User: 76561198026681120>]


In [59]:
# cbr for user
def reset_benchmark_cbr_for_user(users):
    #users = User.select().where(User.last_benchmark_cbr_update_time != None)
    system, _ = System.get_or_create(key="GameCBR")
    system.date_time_value = None
    system.save()
    for user in users:
        user.last_benchmark_cbr_update_time = None
        user.save()


def check_result(result_games: list, check_games: list):
    check_game_len = len(check_games)
    return (
        len(set(result_games[: len(check_games)]).intersection(check_games))
        / check_game_len
    )


def get_benchmark_cbr_for_user(
    user, result_count=-1, min_player_count=36, cbr_for_game_result_count=16
):
    if (
        not user.last_benchmark_cbr_update_time
        or days_delta(user.last_benchmark_cbr_update_time) >= 7
    ):
        played_user_games = UserGame.select(
            UserGame.game, UserGame.last_played, UserGame.score
        ).where((UserGame.user == user) & (UserGame.playtime > 0))
        last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
        played_user_games = played_user_games.where(UserGame.last_played < last_played)
        played_games = [ug.game for ug in played_user_games]
        user_games_with_score = played_user_games.where(UserGame.score > 0)
        games_with_score = [ug.game for ug in user_games_with_score]
        result = {}
        for user_game, game_cbr_result in zip(
            user_games_with_score,
            [
                get_cbr_for_game(g, cbr_for_game_result_count, min_player_count)
                for g in games_with_score
            ],
        ):
            if game_cbr_result:
                for sim_game in game_cbr_result:
                    if sim_game not in played_games and sim_game.rating >= 7:
                        if sim_game.id not in result:
                            result[sim_game.id] = (
                                user_game.score * game_cbr_result[sim_game]
                            )
                        else:
                            result[sim_game.id] += (
                                user_game.score * game_cbr_result[sim_game]
                            )
        benchmark_user_cbr, _ = BenchmarkUserCBR.get_or_create(user=user)
        benchmark_user_cbr.data = json.dumps(
            normalize_dict(
                dict(sorted(result.items(), key=lambda x: x[1], reverse=True))
            )
        )
        benchmark_user_cbr.save()
        benchmark_user_cbr.save()
        user.last_benchmark_cbr_update_time = dt.datetime.now()
        user.save()
    data = {
        Game.get_by_id(game_id): value
        for game_id, value in json.loads(
            BenchmarkUserCBR.get_or_none(BenchmarkUserCBR.user == user).data
        ).items()
    }
    if result_count != -1:
        return dict(list(data.items())[:result_count])
    else:
        return dict(list(data.items()))


def train_cbr_for_user(users, try_count = 100):
    best_accuracy = 0
    best_cbr_for_game_result = 1
    best_min_player_count = 1
    for i in range(try_count):
        reset_benchmark_cbr_for_user(users)
        min_player_count = random.randint(1, 50)
        cbr_for_game_result_count = random.randint(1, 50)
        cbr_accuracy = []
        for user in users:
            played_user_games = UserGame.select(
                UserGame.game, UserGame.last_played
            ).where((UserGame.user == user) & (UserGame.playtime > 0))
            last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
            input_user_games = played_user_games.where(
                UserGame.last_played < last_played
            )
            check_user_games = played_user_games.where(
                UserGame.last_played >= last_played
            )
            played_games = [ug.game for ug in input_user_games]
            check_games = [ug.game for ug in check_user_games]
            cbr_result = get_benchmark_cbr_for_user(
                user, -1, min_player_count, cbr_for_game_result_count
            )
            cbr_accuracy.append(check_result(list(cbr_result.keys()), check_games))
        current_accuracy = np.mean(cbr_accuracy)
        print("current", current_accuracy, min_player_count, cbr_for_game_result_count)
        if  best_accuracy < current_accuracy:
            best_accuracy = current_accuracy
            best_min_player_count = min_player_count
            best_cbr_for_game_result = cbr_for_game_result_count
        print("best", best_accuracy, min_player_count, cbr_for_game_result_count)

In [60]:
# train_cbr_for_user(users, 0.099)

In [61]:
# mbcf for user


def reset_benchmark_mbcf_for_user(users):
    systems = [
        System.get_or_none(System.key == "BenchmarkUserMBCF"),
        System.get_or_none(System.key == "NormalizedPlaytime"),
        System.get_or_none(System.key == "BenchmarkUserSimilarity"),
    ]
    for system in systems:
        if system:
            system.date_time_value = None
            system.save()
    for user in users:
        user.last_benchmark_mbcf_update_time = None
        user.save()


def get_similar_users(
    user,
    max_count=-1,
    min_player_count=10,
    zscore_norm=False,
    corrcoef=False,
):
    system, _ = System.get_or_create(key="BenchmarkUserSimilarity")
    if not system.date_time_value or days_delta(system.date_time_value) >= 7:
        normalized_playtimes = get_normalized_playtimes(
            min_player_count, True, zscore_norm
        )
        games = list(normalized_playtimes.keys())
        users = {}
        for game in normalized_playtimes.keys():
            for u in normalized_playtimes[game].keys():
                if u in users:
                    users[u] += 1
                else:
                    users[u] = 1
        users = [u for u, game_count in users.items() if game_count >= 10]
        game_vecs = []
        for game in games:
            game_vec = []
            for u in users:
                value = normalized_playtimes[game].get(u)
                game_vec.append(value if value else 0)
            game_vecs.append(game_vec)
        game_vecs = np.array(game_vecs, dtype=np.float32)
        user_vecs = np.flip(np.rot90(game_vecs), 0)
        if corrcoef:
            user_vecs = np.corrcoef(user_vecs)
        else:
            user_vecs = cosine_similarity(user_vecs)
        users_sim = {}
        BenchmarkUserSimilarity.delete().execute()
        for u_a, user_vec in zip(users, user_vecs):
            result = []
            for u_b, value in zip(users, user_vec):
                result.append((u_b, float(value)))
            BenchmarkUserSimilarity.create(
                user=User.get_by_id(u_a),
                data=dict(sorted(result, key=lambda x: x[1], reverse=True)),
            )
        system.date_time_value = dt.datetime.now()
        system.save()
    user_similarity = BenchmarkUserSimilarity.get_or_none(
        BenchmarkUserSimilarity.user == user
    )
    if user_similarity:
        if max_count == -1:
            return user_similarity.data
        else:
            return dict(list(user_similarity.data.items())[1 : max_count + 1])
    else:
        return {}


def get_benchmark_mbcf_for_user(
    user,
    max_count=-1,
    min_player_count=24,
    sim_user_count=35,
    zscore_norm=False,
    corrcoef=False,
):
    if (
        not user.last_benchmark_mbcf_update_time
        or days_delta(user.last_benchmark_mbcf_update_time) >= 7
    ):
        normalized_playtimes = get_normalized_playtimes(
            min_player_count, True, zscore_norm
        )
        users_sim = get_similar_users(
            user, sim_user_count, min_player_count, zscore_norm, corrcoef
        )
        played_user_games = UserGame.select(
            UserGame.game, UserGame.last_played, UserGame.score
        ).where((UserGame.user == user) & (UserGame.playtime > 0))
        last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
        played_user_games = played_user_games.where(UserGame.last_played < last_played)
        played_games = [ug.game for ug in played_user_games]
        played_game_ids = [g.id for g in played_games]
        result = {}
        for game_id in normalized_playtimes.keys():
            if game_id not in played_game_ids:
                for user_id, value in users_sim.items():
                    pt = normalized_playtimes[game_id].get(int(user_id))
                    if pt:
                        if game_id in result:
                            result[game_id] += pt * value
                        else:
                            result[game_id] = pt * value
        user_mbcf, _ = BenchmarkUserMBCF.get_or_create(
            user=user,
        )
        user_mbcf.data = dict(sorted(result.items(), key=lambda x: x[1], reverse=True))
        user_mbcf.save()
        user.date_time_value = dt.datetime.now()
        user.save()
    user_mbcf = BenchmarkUserMBCF.get_or_none(BenchmarkUserMBCF.user == user)
    if user_mbcf:
        data = {
            Game.get_by_id(int(key)): value for key, value in user_mbcf.data.items()
        }
        if max_count == -1:
            return data
        else:
            return dict(list(data.items())[:max_count])
    else:
        return {}


# reset_benchmark_mbcf_for_user(users)
# for user in users:
# print(user.username, get_benchmark_mbcf_for_user(user, 10))


def check_result(result_games: list, check_games: list):
    check_game_len = len(check_games)
    return (
        len(set(result_games[: len(check_games)]).intersection(check_games))
        / check_game_len
    )


def train_mbcf_for_user(users, try_count = 100):
    best_accuracy = 0
    best_cbr_for_game_result = 1
    best_min_player_count = 1
    best_zscore_norm = False
    best_corrcoef = False
    for i in range(try_count):
        reset_benchmark_mbcf_for_user(users)
        zscore_norm = random.choice([True, False])
        corrcoef = random.choice([True, False])
        min_player_count = random.randint(1, 50)
        sim_user_count= random.randint(1, 50)
        mbcf_accuracy = []
        for user in users:
            played_user_games = UserGame.select(
                UserGame.game, UserGame.last_played
            ).where((UserGame.user == user) & (UserGame.playtime > 0))
            last_played = np.quantile(
                [ug.last_played for ug in played_user_games], 0.8
            )
            input_user_games = played_user_games.where(
                UserGame.last_played < last_played
            )
            check_user_games = played_user_games.where(
                UserGame.last_played >= last_played
            )
            played_games = [ug.game for ug in input_user_games]
            check_games = [ug.game for ug in check_user_games]
            mbcf_result = get_benchmark_mbcf_for_user(
                user, -1, min_player_count, sim_user_count, zscore_norm, corrcoef
            )
            mbcf_accuracy.append(check_result(list(mbcf_result.keys()), check_games))
        current_accuracy = np.mean(mbcf_accuracy)
        print("current", current_accuracy, min_player_count, sim_user_count, zscore_norm, corrcoef)
        if best_accuracy < current_accuracy:
            best_accuracy = current_accuracy
            best_min_player_count = min_player_count
            best_sim_user_count = sim_user_count 
            best_zscore_norm = zscore_norm
            best_corrcoef = corrcoef
        print("best", best_accuracy, best_min_player_count, best_sim_user_count, best_zscore_norm, best_corrcoef)

In [62]:
train_mbcf_for_user(users, 2)

get_normalized_playtimes 22 True False
current 0.14049004217071445 22 14 False True
best 0.14049004217071445 22 14 False True
get_normalized_playtimes 14 True True
current 0.15285498814910578 14 25 True False
best 0.15285498814910578 14 25 True False


In [63]:
train_cbr_for_user(users, 2)

get_cbr_for_game
current 0.033013205282112844 26 22
best 0.033013205282112844 26 22
get_cbr_for_game
current 0.07531012404961986 2 5
best 0.07531012404961986 2 5


In [64]:
# mobcf for user

In [65]:
# hrs for game

In [66]:
# benchmark
def check_result(result_games: list, check_games: list):
    check_game_len = len(check_games)
    return len(set(result_games[:len(check_games)]).intersection(check_games)) / check_game_len

cbr_accuracy = []
mbcf_accuracy = []

reset_benchmark_cbr_for_user(users)
reset_benchmark_mbcf_for_user(users)

for user in users:
    played_user_games = UserGame.select(UserGame.game, UserGame.last_played).where((UserGame.user == user) & (UserGame.playtime > 0))
    last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
    input_user_games = played_user_games.where(UserGame.last_played < last_played)
    check_user_games = played_user_games.where(UserGame.last_played >= last_played)
    played_games = [ug.game for ug in input_user_games]
    check_games = [ug.game for ug in check_user_games]
    cbr_result = get_benchmark_cbr_for_user(user, 36)
    mbcf_result = get_benchmark_mbcf_for_user(user, 36)
    cbr_accuracy.append(check_result(list(cbr_result.keys()), check_games))
    mbcf_accuracy.append(check_result(list(mbcf_result.keys()), check_games))
print("cbr accuracy:", np.mean(cbr_accuracy))
print("mbcf accuracy:", np.mean(mbcf_accuracy))

get_cbr_for_game
get_normalized_playtimes 24 True False
cbr accuracy: 0.1036660818173423
mbcf accuracy: 0.1889094099178133
