In [1]:
import datetime as dt
import json
from operator import itemgetter

import numpy as np
import scipy.stats as stats
from sklearn import preprocessing
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity

from game_lists_site.models import (
    Game,
    GameCBR,
    GameDeveloper,
    GameGenre,
    GameStats,
    GameTag,
    System,
    User,
    UserCBR,
    UserGame,
    UserMBCF,
    db,
    user_data_dir,
)
from game_lists_site.utils.utils import days_delta, get_game_stats, normalize_dict

In [2]:
db.rollback()
# user.last_cbr_update_time = None
# user.save()
# game = Game.get_by_id(412020)

In [3]:
import concurrent.futures
import os
from unittest import result


def get_normalized_playtimes_new(
    min_player_count=10, normalize=True, zscore_norm=False
):
    games = [
        gs.game
        for gs in GameStats.select(GameStats.game).where(
            GameStats.player_count >= min_player_count
        )
    ]
    users_games = (
        UserGame.select(UserGame.playtime, UserGame.user)
        .where(UserGame.playtime > 0))
    result = {}
    for game in games:
        users_game = users_games.where(UserGame.game == game)
        playtimes = [user_game.playtime for user_game in users_game]
        if normalize:
            if zscore_norm:
                playtimes = stats.zscore(playtimes)
            else:
                playtimes = preprocessing.normalize([playtimes])[0]
        result[game.id] = {
            ug.user.id: playtime for ug, playtime in zip(users_game, playtimes)
        }
    return result
get_normalized_playtimes_new()


{459820: {76561198415178237: 0.0013916893149904775,
  76561198452902527: 0.0008564241938402939,
  76561198372055801: 0.00010705302423003674,
  76561198083927532: 0.11144219822346824,
  76561198083928469: 0.0056738102841919465,
  76561198190400041: 0.010277090326083526,
  76561198118032525: 0.18637931518449397,
  76561198194383594: 0.005352651211501837,
  76561198189673933: 0.003211590726901102,
  76561198397084477: 0.10855176656925725,
  76561198304593630: 0.03168769517209087,
  76561198276546837: 0.00010705302423003674,
  76561198184586273: 0.9694721874272126},
 380840: {76561198796145774: 0.0013975096269725402,
  76561198112999216: 0.010847336628405908,
  76561198157974457: 0.000532384619799063,
  76561198260719093: 0.9992859313628412,
  76561198083927802: 0.007786125064561296,
  76561198083928130: 0.001464057704447423,
  76561198138848769: 0.0008651250071734773,
  76561198209914487: 0.0001996442324246486,
  76561198194383594: 0.0017967980918218375,
  76561198390246337: 0.00405943272

In [4]:
def get_normalized_playtimes(min_player_count=10, normalize=True, zscore_norm=False):
    system, _ = System.get_or_create(key="NormalizedPlaytime")
    if not system.date_time_value or days_delta(system.date_time_value) >= 7 or True:
        games = [
            game
            for game in Game.select()
            if get_game_stats(game).player_count >= min_player_count
        ]
        result = {}
        for game in games:
            user_games = (
                UserGame.select()
                .where(UserGame.game == game)
                .where(UserGame.playtime > 0)
            )
            playtimes = [user_game.playtime for user_game in user_games]
            normalized_playtimes = []
            if normalize:
                if zscore_norm:
                    normalized_playtimes = stats.zscore(playtimes)
                else:
                    normalized_playtimes = preprocessing.normalize([playtimes])[0]
            else:
                normalized_playtimes = playtimes
            result[game.id] = {
                ug.user.id: normalized_playtime
                for ug, normalized_playtime in zip(user_games, normalized_playtimes)
            }
        with (user_data_dir / "normalized_playtimes.json").open("w") as data_file:
            json.dump(result, data_file)
        system.date_time_value = dt.datetime.now()
        system.save()
        return result
    else:
        with (user_data_dir / "normalized_playtimes.json").open() as data_file:
            result = {
                int(key_a): {int(key_b): value_b for key_b, value_b in value_a.items()}
                for key_a, value_a in json.load(data_file).items()
            }
        return result
# print(len(get_normalized_playtimes()))

In [5]:
def get_mbcf_for_user(
    target_user,
    max_count=-1,
    max_player_count=16,
    normalize=True,
    corrcoef=True,
    sim_user_count=9,
    zscore_norm = False
):
    system, _ = System.get_or_create(key="UserMBCF")
    if not system.date_time_value or days_delta(system.date_time_value) >= 7 or True:
        normalized_playtimes = get_normalized_playtimes_new(max_player_count, normalize, zscore_norm)
        games = Game.select()
        users = [
            user
            for user in User.select()
            if len(
                UserGame.select()
                .where(UserGame.user == user)
                .where(UserGame.playtime != None)
            )
            >= 10
        ]
        game_vecs = []
        for game in games:
            if not normalized_playtimes.get(game.id):
                continue
            game_vec = {user: 0 for user in users}
            user_games = [
                ug
                for ug in UserGame.select().where(UserGame.game == game)
                if normalized_playtimes[game.id].get(ug.user.id)
            ]
            for ug in user_games:
                if ug.user in game_vec:
                    game_vec[ug.user] = normalized_playtimes[game.id].get(ug.user.id)
            game_vecs.append(list(game_vec.values()))
        game_vecs = np.array(game_vecs, dtype=np.float32)
        user_vecs = np.flip(np.rot90(game_vecs), 0)
        if corrcoef:
            user_vecs = np.corrcoef(user_vecs)
        else:
            user_vecs = cosine_similarity(user_vecs)
        sim_users = {}
        for user, user_vec in zip(users, user_vecs):
            result = {}
            for u, sim in zip(users, user_vec):
                result[u] = float(sim)
            result = dict(
                sorted(result.items(), key=lambda x: x[1], reverse=True)[
                    1 : sim_user_count + 1
                ]
            )
            sim_users[user] = result
        i = 0
        count = len(sim_users)
        for user_a, sim in sim_users.items():
            print(f"{i/count*100:.2f}%", end="")
            print("\r", end="")
            i += 1
            played_games = set()
            played_user_games = (
                UserGame.select()
                .where(UserGame.user == user_a)
                .where(UserGame.playtime > 0)
            )
            if len(played_user_games) > 1:
                quantile = np.quantile(
                    [ug.last_played for ug in played_user_games], 0.10
                )
                check_user_games = [
                    ug for ug in played_user_games if ug.last_played <= quantile
                ]
                for ug in played_user_games:
                    if ug not in check_user_games:
                        played_games.add(ug.game)
                played_games = list(played_games)
            games = {}
            for user_b, value in sim.items():
                for user_game in (
                    UserGame.select()
                    .where(UserGame.user == user_b)
                    .where(UserGame.playtime != None)
                ):
                    game = user_game.game
                    user = user_game.user
                    if (
                        game not in played_games
                        and game.id in normalized_playtimes
                        and user.id in normalized_playtimes[game.id]
                        and game.rating >= 7
                    ):
                        if game.id in games:
                            games[game.id] += (
                                normalized_playtimes[game.id].get(user.id) * value
                            )
                        else:
                            games[game.id] = (
                                normalized_playtimes[game.id].get(user.id) * value
                            )
            games = dict(sorted(games.items(), key=lambda x: x[1], reverse=True))
            user_mbcf, _ = UserMBCF.get_or_create(user=user_a)
            user_mbcf.data = json.dumps(games)
            user_mbcf.save()
        system.date_time_value = dt.datetime.now()
        system.save()
    user_mbcf = UserMBCF.get_or_none(UserMBCF.user == target_user)
    if user_mbcf:
        data = {
            Game.get_by_id(game_id): value
            for game_id, value in json.loads(user_mbcf.data).items()
        }
        if len(data) > max_count:
            return dict(list(data.items())[:max_count])
        else:
            return data
    else:
        return {}

In [20]:
def get_mbcf_for_user_new(
    target_user,
    max_count=9,
    min_player_count=10,
    min_game_count=20,
    normalize=True,
    zscore_norm=False,
    corrcoef=False,
    sim_user_count=9,
):
    system, _ = System.get_or_create(key="UserMBCF")
    if not system.date_time_value or days_delta(system.date_time_value) >= 7:
        normalized_playtimes = get_normalized_playtimes_new(
            min_player_count, normalize, zscore_norm
        )
        games = [
            gs.game
            for gs in GameStats.select(GameStats.game).where(
                GameStats.player_count >= min_player_count
            )
        ]
        users = [
            user
            for user in User.select()
            if len(
                UserGame.select()
                .where(UserGame.user == user)
                .where(UserGame.playtime != None)
            )
            >= min_game_count
        ]
        game_vecs = []
        for game in games:
            if not normalized_playtimes.get(game.id):
                continue
            game_vec = {user: 0 for user in users}
            user_games = [
                ug
                for ug in UserGame.select(UserGame.user).where(UserGame.game == game)
                if normalized_playtimes[game.id].get(ug.user.id)
            ]
            for ug in user_games:
                if ug.user in game_vec:
                    game_vec[ug.user] = normalized_playtimes[game.id].get(ug.user.id)
            game_vecs.append(list(game_vec.values()))
        game_vecs = np.array(game_vecs, dtype=np.float32)
        user_vecs = np.flip(np.rot90(game_vecs), 0)
        if corrcoef:
            user_vecs = np.corrcoef(user_vecs)
        else:
            user_vecs = cosine_similarity(user_vecs)
        sim_users = {}
        for user, user_vec in zip(users, user_vecs):
            result = {}
            for u, sim in zip(users, user_vec):
                result[u] = float(sim)
            result = dict(
                sorted(result.items(), key=lambda x: x[1], reverse=True)[
                    1 : sim_user_count + 1
                ]
            )
            sim_users[user] = result
        i = 0
        count = len(sim_users)
        for user_a, sim in sim_users.items():
            print(f"{i/count*100:.2f}%", end="")
            print("\r", end="")
            i += 1
            played_games = []
            played_user_games = (
                UserGame.select()
                .where(UserGame.user == user_a)
                .where(UserGame.playtime > 0)
            )
            if len(played_user_games) > 1:
                last_played = np.quantile(
                    [ug.last_played for ug in played_user_games], 0.8
                )
                played_games = [ug for ug in played_user_games.where(UserGame.last_played < last_played)]
            else:
                played_games = [ug.game for ug in played_user_games]
            games = {}
            for user_b, value in sim.items():
                for user_game in (
                    UserGame.select(UserGame.game, UserGame.playtime, UserGame.user)
                    .where(UserGame.user == user_b)
                    .where(UserGame.playtime != None)
                ):
                    game = user_game.game
                    user = user_game.user
                    if (
                        game not in played_games
                        and game.id in normalized_playtimes
                        and user.id in normalized_playtimes[game.id]
                        and game.rating >= 7
                    ):
                        if game.id in games:
                            games[game.id] += (
                                normalized_playtimes[game.id].get(user.id) * value
                            )
                        else:
                            games[game.id] = (
                                normalized_playtimes[game.id].get(user.id) * value
                            )
            games = dict(sorted(games.items(), key=lambda x: x[1], reverse=True))
            user_mbcf, _ = UserMBCF.get_or_create(user=user_a)
            user_mbcf.data = json.dumps(games)
            user_mbcf.save()
        system.date_time_value = dt.datetime.now()
        system.save()
    user_mbcf = UserMBCF.get_or_none(UserMBCF.user == target_user)
    if user_mbcf:
        data = {
            Game.get_by_id(game_id): value
            for game_id, value in json.loads(user_mbcf.data).items()
        }
        return dict(list(data.items())[:max_count])
    else:
        return {}


# system, _ = System.get_or_create(key="UserMBCF")
# system.date_time_value = None
# system.save()
# user = User.get_by_id(76561198083927294)
# get_mbcf_for_user_new(user, 9)


In [14]:
users = []
for user in User.select():
    count = (
        UserGame.select(UserGame.score)
        .where(UserGame.user == user)
        .where(UserGame.score > 0)
        .count()
    )
    if count >= 10:
        users.append(user)
print(len(users))

7


In [None]:
user = User.get_by_id(76561198083927294)
get_mbcf_for_user(user, 9)

  c /= stddev[:, None]
  c /= stddev[None, :]


1.01%

KeyboardInterrupt: 

In [22]:
import random
from itertools import combinations_with_replacement

import matplotlib.pyplot as plt
import pandas as pd
from IPython.display import clear_output, display

max_accuracy = 0 # 2
best_row = [0, 0, False, False, False, 0]
data = {}
lines = []
for user in users:
    played_user_games = UserGame.select().where(UserGame.user == user).where(UserGame.playtime > 0)
    last_played = np.quantile([ug.last_played for ug in played_user_games], 0.8)
    input_data = played_user_games.where(UserGame.last_played < last_played)
    check_data = played_user_games.where(UserGame.last_played >= last_played)
    data[user] = [input_data, check_data]
for i in range(10000):
    row = [
        21,
        random.randint(1, 50),
        True,
        random.choice([True, False]),
        random.choice([True, False]),
        random.randint(1, 50),
    ]
    system, _ = System.get_or_create(key="UserMBCF")
    system.date_time_value = None
    system.save()
    accuracy = []
    for user, d in data.items():
        result = get_mbcf_for_user_new(user, d[0].count(), row[0], row[1], row[2], row[3], row[4], row[5])
        intersection_count = len(set([ug.game for ug in d[1]]).intersection(list(result.keys())))
        accuracy.append(intersection_count / len(d[1]))
    accuracy = np.mean(accuracy)
    if accuracy > max_accuracy:
        max_accuracy = accuracy
        best_row = row
    lines.append(row + [accuracy])
    clear_output(wait=True)
    print(f"Iteration #{i}")
    print("row", row)
    print(accuracy)
    print("-"*24)
    print(f"Best Iteration")
    print("row", best_row)
    print(max_accuracy)
    print("-"*24)
    df = pd.DataFrame(lines)
    df.to_csv("result2.csv", index=False)
# clear_output(wait=True)

Iteration #22
row [21, 46, True, False, True, 27]
0.2058207898544033
------------------------
Best Iteration
row [21, 17, True, False, False, 10]
0.28301628343645147
------------------------
24.27%

KeyboardInterrupt: 