In [436]:
# imports
import datetime as dt
import imp
import json
import random
from itertools import combinations_with_replacement
from pathlib import Path

import numpy as np
import pandas as pd
import psycopg2
import scipy.stats as stats
import torch
import torch.nn as nn
import torch.nn.functional as F
from math import ceil
from sklearn import preprocessing
from sklearn.metrics.pairwise import cosine_similarity

from game_lists_site.models import (
    BenchmarkUserCBR,
    BenchmarkUserMBCF,
    BenchmarkUserSimilarity,
    Game,
    System,
    User,
    UserGame,
    GameStats,
    db,
    user_data_dir,
)
from game_lists_site.utils.utils import (
    days_delta,
    get_cbr_for_game,
    normalize_dict,
    get_game_stats,
)

db.rollback()


In [437]:
# get users
users = []
for user in User.select(User.id, User.username):
    game_with_score_count = (
        UserGame.select(UserGame.id)
        .where((UserGame.user == user) & (UserGame.score > 0))
        .count()
    )
    if game_with_score_count > 1:
        users.append(user)
print(users)

[<User: 76561198125290350>, <User: 76561198094109207>, <User: 76561198026681120>, <User: 76561198091812571>, <User: 76561198083927294>, <User: 76561198067514875>, <User: 76561198394079733>]


In [438]:
def get_normalized_playtimes(min_player_count, normalize, zscore_norm):
    games = [
        gs.game
        for gs in GameStats.select(GameStats.game).where(
            GameStats.player_count >= min_player_count
        )
    ]
    users_games = (
        UserGame.select(UserGame.playtime, UserGame.user)
        .where(UserGame.playtime > 0))
    result = {}
    for game in games:
        users_game = users_games.where(UserGame.game == game)
        playtimes = [user_game.playtime for user_game in users_game]
        if normalize:
            if zscore_norm:
                playtimes = stats.zscore(playtimes)
            else:
                playtimes = preprocessing.normalize([playtimes])[0]
        result[game.id] = {
            ug.user.id: playtime for ug, playtime in zip(users_game, playtimes)
        }
    return result


In [439]:
# cbr for user
def reset_benchmark_cbr_for_user(users):
    #users = User.select().where(User.last_benchmark_cbr_update_time != None)
    system, _ = System.get_or_create(key="GameCBR")
    system.date_time_value = None
    system.save()
    for user in users:
        user.last_benchmark_cbr_update_time = None
        user.save()


def check_result(result_games: list, check_games: list):
    check_game_len = len(check_games)
    return (
        len(set(result_games[: len(check_games)]).intersection(check_games))
        / check_game_len
    )


def get_benchmark_cbr_for_user(
    user, result_count=-1, min_player_count=24, cbr_for_game_result_count=2
):
    if (
        not user.last_benchmark_cbr_update_time
        or days_delta(user.last_benchmark_cbr_update_time) >= 7
    ):
        played_user_games = UserGame.select(
            UserGame.game, UserGame.last_played, UserGame.score
        ).where((UserGame.user == user) & (UserGame.playtime > 0))
        last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
        played_user_games = played_user_games.where(UserGame.last_played < last_played)
        played_games = [ug.game for ug in played_user_games]
        user_games_with_score = played_user_games.where(UserGame.score > 0)
        games_with_score = [ug.game for ug in user_games_with_score]
        result = {}
        for user_game, game_cbr_result in zip(
            user_games_with_score,
            [
                get_cbr_for_game(g, cbr_for_game_result_count, min_player_count)
                for g in games_with_score
            ],
        ):
            if game_cbr_result:
                for sim_game in game_cbr_result:
                    if sim_game not in played_games and sim_game.rating >= 7:
                        if sim_game.id not in result:
                            result[sim_game.id] = (
                                user_game.score * game_cbr_result[sim_game]
                            )
                        else:
                            result[sim_game.id] += (
                                user_game.score * game_cbr_result[sim_game]
                            )
        benchmark_user_cbr, _ = BenchmarkUserCBR.get_or_create(user=user)
        benchmark_user_cbr.data = json.dumps(
            normalize_dict(
                dict(sorted(result.items(), key=lambda x: x[1], reverse=True))
            )
        )
        benchmark_user_cbr.save()
        benchmark_user_cbr.save()
        user.last_benchmark_cbr_update_time = dt.datetime.now()
        user.save()
    data = {
        Game.get_by_id(game_id): value
        for game_id, value in json.loads(
            BenchmarkUserCBR.get_or_none(BenchmarkUserCBR.user == user).data
        ).items()
    }
    if result_count != -1:
        return dict(list(data.items())[:result_count])
    else:
        return dict(list(data.items()))


def train_cbr_for_user(users, try_count = 100):
    best_accuracy = 0
    best_min_player_count = 1
    best_cbr_for_game_result_count = 1
    file = Path("cbr.csv")
    if file.exists():
        df = pd.read_csv(file)
        data = df.values.tolist()
    else:
        data = []
    print(data)
    def check(min_player_count, cbr_for_game_result_count):
        sub_data = [el[:2] for el in data]
        if [float(min_player_count), float(cbr_for_game_result_count)] in sub_data:
            return True
    for i in range(try_count):
        reset_benchmark_cbr_for_user(users)
        min_player_count = random.randint(1, 50)
        cbr_for_game_result_count = random.randint(1, 50)
        if check(min_player_count, cbr_for_game_result_count):
            print("already exist")
            continue
        cbr_accuracy = []
        for user in users:
            played_user_games = UserGame.select(
                UserGame.game, UserGame.last_played
            ).where((UserGame.user == user) & (UserGame.playtime > 0))
            last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
            input_user_games = played_user_games.where(
                UserGame.last_played < last_played
            )
            check_user_games = played_user_games.where(
                UserGame.last_played >= last_played
            )
            played_games = [ug.game for ug in input_user_games]
            check_games = [ug.game for ug in check_user_games]
            cbr_result = get_benchmark_cbr_for_user(
                user, -1, min_player_count, cbr_for_game_result_count
            )
            cbr_accuracy.append(check_result(list(cbr_result.keys()), check_games))
        current_accuracy = np.mean(cbr_accuracy)
        data.append([min_player_count, cbr_for_game_result_count, current_accuracy])
        df = pd.DataFrame(data)
        df.to_csv(file, index=False)
        print("current", current_accuracy, min_player_count, cbr_for_game_result_count)
        if  best_accuracy < current_accuracy:
            best_accuracy = current_accuracy
            best_min_player_count = min_player_count
            best_cbr_for_game_result_count = cbr_for_game_result_count
        print("best", best_accuracy, best_min_player_count, best_cbr_for_game_result_count)

In [440]:
# train_cbr_for_user(users, 0.099)

In [441]:
# mbcf for user

def reset_benchmark_mbcf_for_user(users):
    systems = [
        System.get_or_none(System.key == "BenchmarkUserMBCF"),
        System.get_or_none(System.key == "NormalizedPlaytime"),
        System.get_or_none(System.key == "BenchmarkUserSimilarity"),
    ]
    for system in systems:
        if system:
            system.date_time_value = None
            system.save()
    for user in users:
        user.last_benchmark_mbcf_update_time = None
        user.save()


def get_similar_users(
    user,
    normalized_playtimes,
    max_count=-1,
    corrcoef=False,
):
    system, _ = System.get_or_create(key="BenchmarkUserSimilarity")
    if not system.date_time_value or days_delta(system.date_time_value) >= 7:
        games = list(normalized_playtimes.keys())
        users = {}
        for game in normalized_playtimes.keys():
            for u in normalized_playtimes[game].keys():
                if u in users:
                    users[u] += 1
                else:
                    users[u] = 1
        users = [u for u, game_count in users.items() if game_count >= 10]
        game_vecs = []
        for game in games:
            game_vec = []
            for u in users:
                value = normalized_playtimes[game].get(u)
                game_vec.append(value if value else 0)
            game_vecs.append(game_vec)
        game_vecs = np.array(game_vecs, dtype=np.float32)
        user_vecs = np.flip(np.rot90(game_vecs), 0)
        if corrcoef:
            user_vecs = np.corrcoef(user_vecs)
        else:
            user_vecs = cosine_similarity(user_vecs)
        users_sim = {}
        BenchmarkUserSimilarity.delete().execute()
        for u_a, user_vec in zip(users, user_vecs):
            result = []
            for u_b, value in zip(users, user_vec):
                result.append((u_b, float(value)))
            BenchmarkUserSimilarity.create(
                user=User.get_by_id(u_a),
                data=dict(sorted(result, key=lambda x: x[1], reverse=True)),
            )
        system.date_time_value = dt.datetime.now()
        system.save()
    user_similarity = BenchmarkUserSimilarity.get_or_none(
        BenchmarkUserSimilarity.user == user
    )
    if user_similarity:
        if max_count == -1:
            return user_similarity.data
        else:
            return dict(list(user_similarity.data.items())[1 : max_count + 1])
    else:
        return {}


def get_benchmark_mbcf_for_user(
    user,
    max_count=-1,
    min_player_count=39,
    sim_user_count=36,
    zscore_norm=False,
    corrcoef=True,
):
    if (
        not user.last_benchmark_mbcf_update_time
        or days_delta(user.last_benchmark_mbcf_update_time) >= 7
    ):
        normalized_playtimes = get_normalized_playtimes(
            min_player_count, True, zscore_norm
        )
        users_sim = get_similar_users(
            user, normalized_playtimes, sim_user_count, corrcoef
        )
        played_user_games = UserGame.select(
            UserGame.game, UserGame.last_played, UserGame.score
        ).where((UserGame.user == user) & (UserGame.playtime > 0))
        last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
        played_user_games = played_user_games.where(UserGame.last_played < last_played)
        played_games = [ug.game for ug in played_user_games]
        played_game_ids = [g.id for g in played_games]
        result = {}
        for game_id in normalized_playtimes.keys():
            if game_id not in played_game_ids:
                for user_id, value in users_sim.items():
                    pt = normalized_playtimes[game_id].get(int(user_id))
                    if pt:
                        if game_id in result:
                            result[game_id] += pt * value
                        else:
                            result[game_id] = pt * value
        user_mbcf, _ = BenchmarkUserMBCF.get_or_create(
            user=user,
        )
        user_mbcf.data = dict(sorted(result.items(), key=lambda x: x[1], reverse=True))
        user_mbcf.save()
        user.last_benchmark_mbcf_update_time = dt.datetime.now()
        user.save()
    user_mbcf = BenchmarkUserMBCF.get_or_none(BenchmarkUserMBCF.user == user)
    if user_mbcf:
        data = {
            Game.get_by_id(int(key)): value for key, value in user_mbcf.data.items()
        }
        if max_count == -1:
            return data
        else:
            return dict(list(data.items())[:max_count])
    else:
        return {}


# reset_benchmark_mbcf_for_user(users)
# for user in users:
# print(user.username, get_benchmark_mbcf_for_user(user, 10))


def check_result(result_games: list, check_games: list):
    check_game_len = len(check_games)
    return (
        len(set(result_games[: len(check_games)]).intersection(check_games))
        / check_game_len
    )


def train_mbcf_for_user(users, try_count = 100):
    best_accuracy = 0
    best_cbr_for_game_result = 1
    best_min_player_count = 1
    best_zscore_norm = False
    best_corrcoef = False
    file = Path("mbcf.csv")
    if file.exists():
        df = pd.read_csv(file)
        data = df.values.tolist()
    else:
        data = []
    def check(min_player_count, sim_user_count, zscore_norm, corrcoef):
        sub_data = [el[:4] for el in data]
        if [min_player_count, sim_user_count, zscore_norm, corrcoef] in sub_data:
            return True
    for i in range(try_count):
        try:
            reset_benchmark_mbcf_for_user(users)
            zscore_norm = random.choice([True, False])
            corrcoef = random.choice([True, False])
            min_player_count = random.randint(1, 50)
            sim_user_count= random.randint(1, 50)
            if check(min_player_count, sim_user_count, zscore_norm, corrcoef):
                print("already exist")
                continue
            mbcf_accuracy = []
            for user in users:
                played_user_games = UserGame.select(
                    UserGame.game, UserGame.last_played
                ).where((UserGame.user == user) & (UserGame.playtime > 0))
                last_played = np.quantile(
                    [ug.last_played for ug in played_user_games], 0.8
                )
                input_user_games = played_user_games.where(
                    UserGame.last_played < last_played
                )
                check_user_games = played_user_games.where(
                    UserGame.last_played >= last_played
                )
                played_games = [ug.game for ug in input_user_games]
                check_games = [ug.game for ug in check_user_games]
                mbcf_result = get_benchmark_mbcf_for_user(
                    user, -1, min_player_count, sim_user_count, zscore_norm, corrcoef
                )
                mbcf_accuracy.append(check_result(list(mbcf_result.keys()), check_games))
            current_accuracy = np.mean(mbcf_accuracy)
            data.append([min_player_count, sim_user_count, zscore_norm, corrcoef, current_accuracy])
            df = pd.DataFrame(data)
            df.to_csv(file, index=False)
            print("current", current_accuracy, min_player_count, sim_user_count, zscore_norm, corrcoef)
            if best_accuracy < current_accuracy:
                best_accuracy = current_accuracy
                best_min_player_count = min_player_count
                best_sim_user_count = sim_user_count 
                best_zscore_norm = zscore_norm
                best_corrcoef = corrcoef
            print("best", best_accuracy, best_min_player_count, best_sim_user_count, best_zscore_norm, best_corrcoef)
        except ValueError:
            continue

In [442]:
# train_cbr_for_user(users, 1000)

In [443]:
# train_mbcf_for_user(users, 500)

In [444]:
# mobcf for user
def reset_benchmark_mobcf_for_user():
    systems = [
        System.get_or_none(System.key == "BenchmarkUserMOBCF"),
        System.get_or_none(System.key == "NormalizedPlaytime"),
    ]
    for system in systems:
        if system:
            system.date_time_value = None
            system.save()


class MF(nn.Module):
    def __init__(self, num_users, num_items, emb_size=100):
        super(MF, self).__init__()
        self.user_emb = nn.Embedding(num_users, emb_size)
        self.item_emb = nn.Embedding(num_items, emb_size)

    def forward(self, u, v):
        u = self.user_emb(u)
        v = self.item_emb(v)
        return (u * v).sum(1)

    def print(self, u, v):
        u = self.user_emb(u)
        v = self.item_emb(v)
        print("u", u)
        print("v", u)


def get_benchmark_mobcf_for_user(
    target_user: User, max_count=-1, min_player_count=38, zscore_norm=True
):
    system, _ = System.get_or_create(key="BenchmarkUserMOBCF")
    if not system.date_time_value or days_delta(system.date_time_value) >= 7:
        normalized_playtimes = get_normalized_playtimes(
            min_player_count, True, zscore_norm
        )
        users_games = {}
        for game_id in normalized_playtimes.keys():
            for user_id in normalized_playtimes[game_id].keys():
                if user_id not in users_games:
                    users_games[user_id] = {}
                users_games[user_id][game_id] = normalized_playtimes[game_id][user_id]
        users_to_delete = []
        for user_id in users_games.keys():
            if len(users_games[user_id]) < ceil(10 / 0.8):
                users_to_delete.append(user_id)
        for user_id in users_to_delete:
            users_games.pop(user_id)
        data = []
        for user_id in users_games:
            user_games = (
                UserGame.select(UserGame.game, UserGame.last_played)
                .join(User)
                .where((UserGame.user.id == user_id) & (UserGame.playtime > 0))
            )
            last_played = np.quantile([ug.last_played for ug in user_games], 0.8)
            played_user_games = user_games.where(UserGame.last_played < last_played)
            for ug in played_user_games:
                pt = users_games[user_id].get(ug.game.id)
                if pt:
                    data.append(
                        {
                            "user_id": user_id,
                            "game_id": ug.game.id,
                            "playtime": pt,
                            "last_played": ug.last_played,
                        }
                    )
        data = pd.DataFrame(data)
        time_80 = np.quantile(data.last_played.values, 0.8)
        train = data[data["last_played"] < time_80].copy()
        val = data[data["last_played"] >= time_80].copy()
        train_user_ids = np.sort(np.unique(train.user_id.values))
        num_users = len(train_user_ids)
        userid2idx = {o: i for i, o in enumerate(train_user_ids)}
        train["user_id"] = train["user_id"].apply(lambda x: userid2idx[x])
        val["user_id"] = val["user_id"].apply(lambda x: userid2idx.get(x, -1))
        val = val[val["user_id"] >= 0].copy()
        train_game_ids = np.sort(np.unique(train.game_id.values))
        num_items = len(train_game_ids)
        gameid2idx = {o: i for i, o in enumerate(train_game_ids)}
        train["game_id"] = train["game_id"].apply(lambda x: gameid2idx[x])
        val["game_id"] = val["game_id"].apply(lambda x: gameid2idx.get(x, -1))
        val = val[val["game_id"] >= 0].copy()

        def valid_loss(model):
            model.eval()
            users = torch.LongTensor(val.user_id.values).cuda()
            items = torch.LongTensor(val.game_id.values).cuda()
            ratings = torch.FloatTensor(val.playtime.values).cuda()
            y_hat = model(users, items)
            loss = F.mse_loss(y_hat, ratings)
            return loss.item()

        def train_epocs(model, epochs=10, lr=0.01, wd=0.0):
            optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
            for i in range(epochs):
                model.train()
                users = torch.LongTensor(train.user_id.values).cuda()
                items = torch.LongTensor(train.game_id.values).cuda()
                ratings = torch.FloatTensor(train.playtime.values).cuda()
                y_hat = model(users, items)
                loss = F.mse_loss(y_hat, ratings)
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
                testloss = valid_loss(model)
                print("train loss %.3f valid loss %.3f" % (loss.item(), testloss), end="")
                print("\r", end="")

        model = MF(num_users, num_items, emb_size=300).cuda()
        train_epocs(model, epochs=300, lr=1, wd=1e-5)
        train_epocs(model, epochs=300, lr=0.1, wd=1e-5)
        train_epocs(model, epochs=300, lr=0.01, wd=1e-5)
        train_epocs(model, epochs=300, lr=0.001, wd=1e-5)
        print()
        torch.save(model.state_dict(), user_data_dir / "benchmark_model.dat")
        with (user_data_dir / "benchmark_userid2idx.json").open("w") as data_file:
            json.dump({int(k): int(v) for k, v in userid2idx.items()}, data_file)
        with (user_data_dir / "benchmark_gameid2idx.json").open("w") as data_file:
            json.dump({int(k): int(v) for k, v in gameid2idx.items()}, data_file)
        system.date_time_value = dt.datetime.now()
        system.save()
    with (user_data_dir / "benchmark_userid2idx.json").open() as data_file:
        userid2idx = {int(k): v for k, v in json.load(data_file).items()}
    with (user_data_dir / "benchmark_gameid2idx.json").open() as data_file:
        gameid2idx = {int(k): v for k, v in json.load(data_file).items()}
    num_users = len(userid2idx)
    num_items = len(gameid2idx)
    model = MF(num_users, num_items, emb_size=300).cuda()
    model.load_state_dict(torch.load(user_data_dir / "benchmark_model.dat"))
    model.eval()
    if target_user.id in userid2idx:
        users = torch.LongTensor([userid2idx[target_user.id]]).cuda()
        games = list(gameid2idx.values())
        items = torch.LongTensor(games).cuda()
        result = model(users, items)
        idx2gameid = {value: key for key, value in gameid2idx.items()}
        result = {
            idx2gameid[game_idx.item()]: score.item()
            for score, game_idx in zip(result, items)
        }
        user_games = (
            UserGame.select(UserGame.game, UserGame.last_played)
            .where((UserGame.user == target_user) & (UserGame.playtime > 0))
        )
        last_played = np.quantile([ug.last_played for ug in user_games], 0.8)
        played_user_games = user_games.where(UserGame.last_played < last_played)
        played_games = [ug.game for ug in played_user_games]
        games = {
            Game.get_by_id(key): value
            for key, value in sorted(
                result.items(), key=lambda item: item[1], reverse=True
            )
        }
        data = [
            (game, score)
            for game, score in games.items()
            if game not in played_games
            and get_game_stats(game).player_count > min_player_count
            and game.rating >= 7
        ]
        if max_count == -1:
            return dict(data)
        else:
            return dict(data[:max_count])
    else:
        return {}

def train_mobcf_for_user(users, try_count = 100):
    best_accuracy = -1
    best_min_player_count = 1
    best_zscore_norm = False
    file = Path("mobcf.csv")
    if file.exists():
        df = pd.read_csv(file)
        data = df.values.tolist()
    else:
        data = []
    def check(min_player_count, zscore_norm):
        sub_data = [el[:2] for el in data]
        if [min_player_count, zscore_norm] in sub_data:
            return True
    for i in range(try_count):
        try:
            reset_benchmark_mobcf_for_user()
            zscore_norm = random.choice([True, False])
            min_player_count = random.randint(1, 50)
            if check(min_player_count, zscore_norm):
                print("already exist")
                continue
            mobcf_accuracy = []
            for user in users:
                played_user_games = UserGame.select(
                    UserGame.game, UserGame.last_played
                ).where((UserGame.user == user) & (UserGame.playtime > 0))
                last_played = np.quantile(
                    [ug.last_played for ug in played_user_games], 0.8
                )
                input_user_games = played_user_games.where(
                    UserGame.last_played < last_played
                )
                check_user_games = played_user_games.where(
                    UserGame.last_played >= last_played
                )
                played_games = [ug.game for ug in input_user_games]
                check_games = [ug.game for ug in check_user_games]
                mbcf_result = get_benchmark_mobcf_for_user(
                    user, -1, min_player_count, zscore_norm
                )
                mobcf_accuracy.append(check_result(list(mbcf_result.keys()), check_games))
            current_accuracy = np.mean(mobcf_accuracy)
            data.append([min_player_count, zscore_norm, current_accuracy])
            df = pd.DataFrame(data)
            df.to_csv(file, index=False)
            print("current", current_accuracy, min_player_count, zscore_norm)
            if best_accuracy < current_accuracy:
                best_accuracy = current_accuracy
                best_min_player_count = min_player_count
                best_zscore_norm = zscore_norm
            print("best", best_accuracy, best_min_player_count, best_zscore_norm)
        except ValueError:
            continue


In [445]:
# train_mobcf_for_user(users, 4)

In [446]:
# reset_benchmark_mobcf_for_user()
# get_mobcf_for_user(User.get_by_id(76561198083927294))

In [447]:
# hrs for game
def merge_dicts(dicts: list):
    result = {}
    for d in dicts:
        for v in d:
            if v in result:
                result[v] += d[v]
            else:
                result[v] = d[v]
    result = {
        key: value
        for key, value in sorted(result.items(), key=lambda item: item[1], reverse=True)
    }
    return result


def normalize_dict(dict_data: dict, coef: float = 1):
    values = list(dict_data.values())
    values = preprocessing.normalize([values])[0] * coef
    return {k: v for k, v in zip(dict_data, values)}


def get_benchmark_hrs_for_user(target_user, max_count=-1, cbr_coef = 0.27, mbcf_coef=0.95, mobcf_coef=0.62):
    cbr_result = get_benchmark_cbr_for_user(target_user)
    mbcf_result = get_benchmark_mbcf_for_user(target_user)
    mobcf_result = get_benchmark_mobcf_for_user(target_user)
    result = merge_dicts(
        [
            normalize_dict(cbr_result, cbr_coef),
            normalize_dict(mbcf_result, mbcf_coef),
            normalize_dict(mobcf_result, mobcf_coef),
        ]
    )
    if max_count == -1:
        return result
    else:
        return {k: v for k, v in list(result.items())[:max_count]}

def train_hrs_for_user(users, try_count = 100):
    best_accuracy = -1
    best_cbr_coef = 0.01
    best_mbcf_coef = 0.01
    best_mobcf_coef = 0.01
    file = Path("hrs.csv")
    if file.exists():
        df = pd.read_csv(file)
        data = df.values.tolist()
    else:
        data = []
    def check(cbr_coef, mbcf_coef, mobcf_coef):
        sub_data = [el[:3] for el in data]
        if [cbr_coef, mbcf_coef, mobcf_coef] in sub_data:
            return True
    for i in range(try_count):
        try:
            cbr_coef= random.randint(1, 100) / 100.0
            # cbr_coef= 0.88
            mbcf_coef = random.randint(1, 100) / 100.0
            # mbcf_coef = 0.73
            mobcf_coef = random.randint(1, 100) / 100.0
            # mobcf_coef = 0.03
            if check(cbr_coef, mbcf_coef, mobcf_coef):
                print("already exist")
                continue
            hrs_accuracy = []
            for user in users:
                played_user_games = UserGame.select(
                    UserGame.game, UserGame.last_played
                ).where((UserGame.user == user) & (UserGame.playtime > 0))
                last_played = np.quantile(
                    [ug.last_played for ug in played_user_games], 0.8
                )
                input_user_games = played_user_games.where(
                    UserGame.last_played < last_played
                )
                check_user_games = played_user_games.where(
                    UserGame.last_played >= last_played
                )
                played_games = [ug.game for ug in input_user_games]
                check_games = [ug.game for ug in check_user_games]
                hrs_result = get_benchmark_hrs_for_user(
                    user, -1, cbr_coef, mbcf_coef, mobcf_coef 
                )
                hrs_accuracy.append(check_result(list(hrs_result.keys()), check_games))
            current_accuracy = np.mean(hrs_accuracy)
            data.append([cbr_coef, mbcf_coef, mobcf_coef, current_accuracy])
            df = pd.DataFrame(data)
            df.to_csv(file, index=False)
            print("current", current_accuracy, cbr_coef, mbcf_coef, mobcf_coef)
            if best_accuracy < current_accuracy:
                best_accuracy = current_accuracy
                best_cbr_coef = cbr_coef
                best_mbcf_coef = mbcf_coef
                best_mobcf_coef = mobcf_coef
            print("best", best_accuracy, best_cbr_coef, best_mbcf_coef, best_mobcf_coef)
        except ValueError:
            continue

In [448]:
# reset_benchmark_cbr_for_user(users)
# reset_benchmark_mbcf_for_user(users)
# reset_benchmark_mobcf_for_user()
# train_hrs_for_user(users, 500)

In [449]:
# benchmark
def check_result(result_games: list, check_games: list):
    check_game_len = len(check_games)
    return len(set(result_games[:len(check_games)]).intersection(check_games)) / check_game_len

cbr_accuracy = []
mbcf_accuracy = []
mobcf_accuracy = []
hrs_accuracy = []

reset_benchmark_cbr_for_user(users)
reset_benchmark_mbcf_for_user(users)
reset_benchmark_mobcf_for_user()

for user in users:
    played_user_games = UserGame.select(UserGame.game, UserGame.last_played).where((UserGame.user == user) & (UserGame.playtime > 0))
    last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
    input_user_games = played_user_games.where(UserGame.last_played < last_played)
    check_user_games = played_user_games.where(UserGame.last_played >= last_played)
    played_games = [ug.game for ug in input_user_games]
    check_games = [ug.game for ug in check_user_games]
    hrs_result = get_benchmark_hrs_for_user(user)
    cbr_result = get_benchmark_cbr_for_user(user)
    mbcf_result = get_benchmark_mbcf_for_user(user)
    mobcf_result = get_benchmark_mobcf_for_user(user)
    cbr_accuracy.append(check_result(list(cbr_result.keys()), check_games))
    mbcf_accuracy.append(check_result(list(mbcf_result.keys()), check_games))
    mobcf_accuracy.append(check_result(list(mobcf_result.keys()), check_games))
    hrs_accuracy.append(check_result(list(hrs_result.keys()), check_games))
print("cbr accuracy:", np.mean(cbr_accuracy))
print("mbcf accuracy:", np.mean(mbcf_accuracy))
print("mobcf accuracy:", np.mean(mobcf_accuracy))
print("hrs accuracy:", np.mean(hrs_accuracy))

train loss 0.000 valid loss 0.97628574
cbr accuracy: 0.14707883153261306
mbcf accuracy: 0.2996937236433035
mobcf accuracy: 0.08129713423831071
hrs accuracy: 0.2954920429710346
