In [1]:
import os, tqdm, json, pickle, gc, zipfile, itertools, time, collections
import pandas as pd
import numpy as np
from dateutil import parser
import warnings
warnings.filterwarnings('ignore')
from collections import defaultdict


from sklearn.model_selection import train_test_split
import catboost as cb
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score

class FeaturesGenerator:
    def __init__(self) -> None:
        self.import_data()
        

    def load_data(self, path) -> pickle.load:
        with open(path, 'rb') as f:
            return pickle.load(f)

    def dump_data(self, path, data) -> None:
        with open(path, 'wb') as f:
            pickle.dump(data, f)

    def reduce_mem_usage(self, series):
        try:
            col_type = series.dtype

            if col_type != object:
                c_min = series.min()
                c_max = series.max()
                if str(col_type)[:3] == 'int':
                    if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                        series = series.astype(np.int8)
                    elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                        series = series.astype(np.int16)
                    elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                        series = series.astype(np.int32)
                    elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                        series = series.astype(np.int64)  
                else:
                    if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                        series = series.astype(np.float16)
                    elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                        series = series.astype(np.float32)
                    else:
                        series = series.astype(np.float64)
            else:
                pass 
            return series
        
        except:
            pass    

    def import_data(self)->None:

        team_all_stats_and_location = self.load_data(path_team_all_stats_and_location)        
        team_map_info_stats = self.load_data(path_team_map_info_stats)        
        team_game_month_stats = self.load_data(path_team_game_month_stats)        
        team_game_hour_stats = self.load_data(path_team_game_hour_stats)        
        team_all_pair_stats = self.load_data(path_team_all_pair_stats)
        player_all_stats = self.load_data(path_player_all_stats)        
        player_info_passport = self.load_data(path_player_info_passport)        
        player_map_info_stats = self.load_data(path_player_map_info_stats)        
        player_game_month_stats = self.load_data(path_player_game_month_stats)        
        player_game_hour_stats = self.load_data(path_player_game_hour_stats) 

        self.team_all_stats_and_location = team_all_stats_and_location.apply(self.reduce_mem_usage)   
        self.team_map_info_stats = team_map_info_stats.apply(self.reduce_mem_usage)          
        self.team_game_month_stats = team_game_month_stats.apply(self.reduce_mem_usage)         
        self.team_game_hour_stats = team_game_hour_stats.apply(self.reduce_mem_usage)       
        self.team_all_pair_stats = team_all_pair_stats.apply(self.reduce_mem_usage)   
        self.player_all_stats = player_all_stats.apply(self.reduce_mem_usage)         
        self.player_info_passport = player_info_passport.apply(self.reduce_mem_usage)        
        self.player_map_info_stats = player_map_info_stats.apply(self.reduce_mem_usage)         
        self.player_game_month_stats = player_game_month_stats.apply(self.reduce_mem_usage)        
        self.player_game_hour_stats = player_game_hour_stats.apply(self.reduce_mem_usage)  

        del  team_all_stats_and_location, team_map_info_stats, team_game_month_stats, team_game_hour_stats, team_all_pair_stats,\
             player_all_stats, player_map_info_stats, player_game_month_stats, player_game_hour_stats
        gc.collect()


          

    

    def check_team_id(self, team_dictionary:pd.DataFrame, team:id) -> pd.BooleanDtype:
        return team_dictionary["team.id"] == team

    def check_player_id(self, player_dictionary:pd.DataFrame, player:id) -> pd.BooleanDtype:
        return player_dictionary["player.id"] == player

    def check_map_id(self, map_dictionary:pd.DataFrame, game_map:id) -> pd.BooleanDtype:
        return map_dictionary["map.id"] == game_map

    def check_month(self, month_dictionary:pd.DataFrame, game_month:int) -> pd.BooleanDtype:
        return month_dictionary["month"] == game_month

    def check_hour(self, hour_dictionary:pd.DataFrame, game_hour:int) -> pd.BooleanDtype:
        return hour_dictionary["hour"] == game_hour 

    #### For team

    def only_team_id_dictionary(self, team_dictionary:pd.DataFrame, team:id) -> pd.DataFrame:
        return team_dictionary[self.check_team_id(team_dictionary, team)]

    def team_and_map_id_dictionary(self, team_and_map:pd.DataFrame, team:id, game_map:id) -> pd.DataFrame:

        data_team_and_map = team_and_map[self.check_team_id(team_and_map, team) & self.check_map_id(team_and_map, game_map)]

        if len(data_team_and_map) != 0:            
            return data_team_and_map
        else:            
            return team_and_map[self.check_team_id(team_and_map, team)].groupby("team.id", as_index=False).mean()

    def team_and_month_dictionary(self, team_and_month:pd.DataFrame, team:id, game_month:int) -> pd.DataFrame:

        data_team_and_month = team_and_month[self.check_team_id(team_and_month, team) & self.check_month(team_and_month, game_month)]

        if len(data_team_and_month) != 0:            
            return data_team_and_month
        else:            
            return team_and_month[self.check_team_id(team_and_month, team)].groupby("team.id", as_index=False).mean()

    def team_and_hour_dictionary(self, team_and_hour:pd.DataFrame, team:id, game_hour:int) -> pd.DataFrame:

        data_team_and_hour = team_and_hour[self.check_team_id(team_and_hour, team) & self.check_hour(team_and_hour, game_hour)]

        if len(data_team_and_hour) != 0:            
            return data_team_and_hour
        else:            
            return team_and_hour[self.check_team_id(team_and_hour, team)].groupby("team.id", as_index=False).mean()

    def team_pair_dictionary(self, team_pair:pd.DataFrame, team1:id, team2:id) -> pd.DataFrame:

        data_team_pair = team_pair[self.check_team_id(team_pair, team1) & (team_pair["opponent.id"] == team2)]
        if len(data_team_pair) == 0:            
            return team_pair[self.check_team_id(team_pair, team1)].groupby("team.id", as_index=False).mean()
        else:            
            return data_team_pair

    # For players
    def check_player_dict(self, player_dict:pd.DataFrame, player_id:id, check_function=True) -> pd.DataFrame:
        data_player_dict = player_dict[self.check_player_id(player_dict, player_id) & check_function ]

        if len(data_player_dict) == 0:            
            return player_dict[self.check_player_id(player_dict, player_id)].groupby("player.id", as_index=False).mean()
        else:            
            return data_player_dict

    def players_id_dictionary(self, player_dict:pd.DataFrame, players:list) -> list:
        return [ self.check_player_dict(player_dict, player_id=player_id).values for player_id in players ]

    def players_and_map_dictionary(self, player_dict:pd.DataFrame, players:list, game_map:id) -> list:
        return [ self.check_player_dict(player_dict, player_id, self.check_map_id(player_dict, game_map)).values[0,2:] for player_id in players ]        

    def players_and_month_dictionary(self, player_dict:pd.DataFrame, players:list, game_month:int) -> list:
        return [ self.check_player_dict(player_dict, player_id, self.check_month(player_dict, game_month)).values[0,2:] for player_id in players ] 

    def players_and_hour_dictionary(self, player_dict:pd.DataFrame, players:list, game_hour:int) -> list:
        return [ self.check_player_dict(player_dict, player_id, self.check_hour(player_dict, game_hour)).values[0,2:] for player_id in players ] 


    def check_players_teams_base(self, team1:id, players_team1:list, team2:id, players_team2:list):
        """
        Проверяет наличие всех данных в базах
        :param team1: id
        :param players_team1: list
        :param team2: id
        :param players_team2: list
        :return: status
        """

        players_dict = self.player_all_stats

        players = players_team1.copy()
        players.extend(players_team2)

        for player in players:
            data_player_dict = players_dict[self.check_player_id(players_dict, player)]
            if len(data_player_dict) == 0:
                return "ERROR: the player id: %s is not in the database" % player

        teams_dict = self.team_all_stats_and_location

        for team in [team1, team2]:
            team_check = self.only_team_id_dictionary(teams_dict, team)
            if len(team_check) == 0:
                return "ERROR: the team id: %s is not in the database" % team

        return "SUCCESS"

    def team_profile_with_map(self, team1:id, team2:id, game_map:id, game_month:int, game_hour:int)-> np.array:
        
        all_stats_and_location = self.only_team_id_dictionary(self.team_all_stats_and_location, team1)
        all_stats_and_location = all_stats_and_location.values.ravel()

        
        map_info_stats = self.team_and_map_id_dictionary(self.team_map_info_stats, team1, game_map)
        map_info_stats = map_info_stats.values.ravel()[2:]

        
        game_month_stats = self.team_and_month_dictionary(self.team_game_month_stats, team1, game_month) 
        game_month_stats = game_month_stats.values.ravel()[2:]

        
        game_hour_stats = self.team_and_hour_dictionary(self.team_game_hour_stats, team1, game_hour)
        game_hour_stats = game_hour_stats.values.ravel()[2:]

        all_pair_stats = self.team_pair_dictionary(self.team_all_pair_stats, team1, team2)
        all_pair_stats = all_pair_stats.values.ravel()[2:]

        team_profile = np.append(all_stats_and_location, map_info_stats)
        team_profile = np.append(team_profile, game_month_stats)
        team_profile = np.append(team_profile, game_hour_stats)
        team_profile = np.append(team_profile, all_pair_stats)

        return team_profile

    def teams_columns_with_map(self) -> np.array:
        columns = self.team_all_stats_and_location.columns.to_numpy()
        columns = np.append(columns, self.team_map_info_stats.columns.to_numpy()[2:] + "_map")
        columns = np.append(columns, self.team_game_month_stats.columns.to_numpy()[2:] + "_month")
        columns = np.append(columns, self.team_game_hour_stats.columns.to_numpy()[2:] + "_hour")
        columns = np.append(columns, self.team_all_pair_stats.columns.to_numpy()[2:] + "_pair")

        return columns


    def team_profile(self, team1:id, team2:id, game_month:int, game_hour:int)-> np.array:
    
        all_stats_and_location = self.only_team_id_dictionary(self.team_all_stats_and_location, team1)
        all_stats_and_location = all_stats_and_location.values.ravel()

        
        game_month_stats = self.team_and_month_dictionary(self.team_game_month_stats, team1, game_month) 
        game_month_stats = game_month_stats.values.ravel()

        
        game_hour_stats = self.team_and_hour_dictionary(self.team_game_hour_stats, team1, game_hour)
        game_hour_stats = game_hour_stats.values.ravel()

        
        all_pair_stats = self.team_pair_dictionary(self.team_all_pair_stats, team1, team2)
        all_pair_stats = all_pair_stats.values.ravel()

        team_profile = np.append(all_stats_and_location, game_month_stats)
        team_profile = np.append(team_profile, game_hour_stats)
        team_profile = np.append(team_profile, all_pair_stats)

        return team_profile
        
        
    def players_5_profile_with_map(self, players:list, game_map:id, game_month:int, game_hour:int )-> np.array:

        all_stats =  self.players_id_dictionary(self.player_all_stats, players)
        all_stats = np.array(all_stats).ravel()

        info_passport = self.players_id_dictionary(self.player_info_passport, players)

        # age_players = []
        nationality_players = []

        for player_passport in info_passport:
            # age_players.append(player_passport.ravel()[1])
            nationality_players.append(player_passport.ravel()[1:])

        nationality_players = np.array(nationality_players).sum(axis=0)

        players_passport = nationality_players

        
        map_info_stats = self.players_and_map_dictionary(self.player_map_info_stats, players, game_map)
        map_info_stats = np.array(map_info_stats).ravel()

        
        game_month_stats = self.players_and_month_dictionary(self.player_game_month_stats, players, game_month)
        game_month_stats = np.array(game_month_stats).ravel()

        
        game_hour_stats = self.players_and_hour_dictionary(self.player_game_hour_stats, players, game_hour)
        game_hour_stats = np.array(game_hour_stats).ravel()

        player_profile = np.append(all_stats, players_passport)
        player_profile = np.append(player_profile, map_info_stats)
        player_profile = np.append(player_profile, game_month_stats)
        player_profile = np.append(player_profile, game_hour_stats)

        return player_profile

    def players_5_column_create(self, dictionary:pd.DataFrame, first:bool=False) -> np.array:

        if (first == True):
            dict_col = dictionary.columns.to_numpy()
        else:
            dict_col = dictionary.columns.to_numpy()[2:]
        columns =  dict_col + "_player1"
        for i in range(2, 6):
            columns = np.append(columns, dict_col + "_player" + str(i))

        return columns

    def players_5_columns_with_map(self) -> np.array:
        all_stats_columns = self.players_5_column_create(self.player_all_stats, first=True)

        # passport_columns = ["age_player" + str(i) for i in range(1, 6)]
        passport_columns = self.player_info_passport.columns.to_numpy()[1:]

        map_columns = self.players_5_column_create(self.player_map_info_stats) + "_map"

        month_columns = self.players_5_column_create(self.player_game_month_stats) + "_month"

        hour_columns = self.players_5_column_create(self.player_game_hour_stats) + "_hour"

        columns = np.append(all_stats_columns, passport_columns)
        columns = np.append(columns, map_columns)
        columns = np.append(columns, month_columns)
        columns = np.append(columns, hour_columns)

        return columns


    def players_5_profile(self, players:list, game_month:int, game_hour:int ) -> np.array:

        all_stats =  self.players_id_dictionary(self.player_all_stats, players)
        all_stats = np.array(all_stats).ravel()

        info_passport = self.players_id_dictionary(self.player_info_passport, players)
        info_passport = np.array(info_passport).ravel()

        game_month_stats = self.players_and_month_dictionary(self.player_game_month_stats, players, game_month)
        game_month_stats = np.array(game_month_stats).ravel()
        
        game_hour_stats = self.players_and_hour_dictionary(self.player_game_hour_stats, players, game_hour)
        game_hour_stats = np.array(game_hour_stats).ravel()

        player_profile = np.append(all_stats, info_passport)
        player_profile = np.append(player_profile, game_month_stats)
        player_profile = np.append(player_profile, game_hour_stats)

        return player_profile

#####################################

    def dict_fill_data(self, id_key:str, id_value:id,  extra_key:str, extra_value:int, player_stat_sum:dict, player_stat_mean:dict):
        fill_data = {id_key: id_value, extra_key: extra_value}
        fill_data.update(player_stat_sum)
        fill_data.update(player_stat_mean)
        return fill_data

    def add_team_all_stats_and_location(self, team_id:id, team_location:str, player_stat_sum:dict, player_stat_mean:dict):
        location_key = "location_" + team_location
        if location_key in self.team_all_stats_and_location.columns:
            fill_data = self.dict_fill_data(id_key="team.id", id_value=team_id,
                                            extra_key=location_key, extra_value=1,
                                            player_stat_sum=player_stat_sum, player_stat_mean=player_stat_mean)
        else:
            fill_data = {"team.id": team_id}
            fill_data.update(player_stat_sum)
            fill_data.update(player_stat_mean)

        teams_dict = self.team_all_stats_and_location

        teams_dict = teams_dict.append(fill_data, ignore_index=True)
        teams_dict.iloc[-1] = teams_dict.iloc[-1].fillna(0)
        self.team_all_stats_and_location = teams_dict

    def add_team_map_info_stats(self, team_id:id, game_map:int, player_stat_sum:dict, player_stat_mean:dict):
        fill_data = self.dict_fill_data(id_key="team.id", id_value=team_id,
                                        extra_key="map.id", extra_value=game_map,
                                        player_stat_sum=player_stat_sum, player_stat_mean=player_stat_mean)

        teams_dict = self.team_map_info_stats

        teams_dict = teams_dict.append(fill_data, ignore_index=True)
        self.team_map_info_stats = teams_dict

    def add_team_game_month_stats(self, team_id:id, game_month:int, player_stat_sum:dict, player_stat_mean:dict):
        fill_data = self.dict_fill_data(id_key="team.id", id_value=team_id,
                                        extra_key="month", extra_value=game_month,
                                        player_stat_sum=player_stat_sum, player_stat_mean=player_stat_mean)

        teams_dict = self.team_game_month_stats

        teams_dict = teams_dict.append(fill_data, ignore_index=True)
        self.team_game_month_stats = teams_dict

    def add_team_game_hour_stats(self, team_id:id, game_hour:int, player_stat_sum:dict, player_stat_mean:dict):
        fill_data = self.dict_fill_data(id_key="team.id", id_value=team_id,
                                        extra_key="hour", extra_value=game_hour,
                                        player_stat_sum=player_stat_sum, player_stat_mean=player_stat_mean)

        teams_dict = self.team_game_hour_stats

        teams_dict = teams_dict.append(fill_data, ignore_index=True)
        self.team_game_hour_stats = teams_dict

    def add_team_all_pair_stats(self, team_id:id, opponent_id:id, player_stat_sum:dict, player_stat_mean:dict):
        fill_data = self.dict_fill_data(id_key="team.id", id_value=team_id,
                                        extra_key="opponent.id", extra_value=opponent_id,
                                        player_stat_sum=player_stat_sum, player_stat_mean=player_stat_mean)

        teams_dict = self.team_all_pair_stats

        teams_dict = teams_dict.append(fill_data, ignore_index=True)
        teams_dict.iloc[-1] = teams_dict.iloc[-1].fillna(0)
        self.team_all_pair_stats = teams_dict

    def change_team_all_pair_wins(self, team_id:id, opponent_id:id, winner_id:id):
        teams_dict = self.team_all_pair_stats
        find_row = self.check_team_id(teams_dict, team_id) & (teams_dict["opponent.id"] == opponent_id)

        if teams_dict.loc[find_row, ["wins_sum"]].empty \
                or teams_dict.loc[find_row, ["wins_sum"]].isna().items():
            win_value = int(team_id == winner_id)
            teams_dict.loc[find_row, ["wins_sum"]] = win_value
            teams_dict.loc[find_row, ["wins_mean"]] = win_value

        else:
            teams_dict.loc[find_row, ["wins_sum"]] += int(team_id == winner_id)
            wins_sum = teams_dict.loc[find_row, ["wins_sum"]].values
            games_count = teams_dict.loc[find_row, ["stats_count"]].values / 5
            teams_dict.loc[find_row, ["wins_mean"]] = wins_sum / games_count

        self.team_all_pair_stats = teams_dict

    def add_player_all_stats(self, player_id:id, player_stat_sum:dict, player_stat_mean:dict):
        fill_data = {"player.id": player_id}
        fill_data.update(player_stat_sum)
        fill_data.update(player_stat_mean)

        players_dict = self.player_all_stats

        players_dict = players_dict.append(fill_data, ignore_index=True)
        self.player_all_stats = players_dict

    def add_player_info_passport(self, player_id:id, player_nationality:str):
        fill_data = {"player.id": player_id}

        if player_nationality != None:
            location_key = "nationality_" + player_nationality
            if location_key in self.player_info_passport.columns:
                fill_data.update({location_key: 1})

        players_dict = self.player_info_passport

        players_dict = players_dict.append(fill_data, ignore_index=True)
        players_dict.iloc[-1] = players_dict.iloc[-1].fillna(0)
        self.player_info_passport = players_dict

    def add_player_map_info_stats(self, player_id:id, game_map:int, player_stat_sum:dict, player_stat_mean:dict):
        fill_data = self.dict_fill_data(id_key="player.id", id_value=player_id,
                                        extra_key="map.id", extra_value=game_map,
                                        player_stat_sum=player_stat_sum, player_stat_mean=player_stat_mean)

        players_dict = self.player_map_info_stats

        players_dict = players_dict.append(fill_data, ignore_index=True)
        self.player_map_info_stats = players_dict

    def add_player_game_month_stats(self, player_id:id, game_month:int, player_stat_sum:dict, player_stat_mean:dict):
        fill_data = self.dict_fill_data(id_key="player.id", id_value=player_id,
                                        extra_key="month", extra_value=game_month,
                                        player_stat_sum=player_stat_sum, player_stat_mean=player_stat_mean)

        players_dict = self.player_game_month_stats

        players_dict = players_dict.append(fill_data, ignore_index=True)
        self.player_game_month_stats = players_dict

    def add_player_game_hour_stats(self, player_id:id, game_hour:int, player_stat_sum:dict, player_stat_mean:dict):
        fill_data = self.dict_fill_data(id_key="player.id", id_value=player_id,
                                        extra_key="hour", extra_value=game_hour,
                                        player_stat_sum=player_stat_sum, player_stat_mean=player_stat_mean)

        players_dict = self.player_game_hour_stats

        players_dict = players_dict.append(fill_data, ignore_index=True)
        self.player_game_hour_stats = players_dict


##############

    def change_dictionary(self, dictionary: pd.DataFrame, id_key:str, id_value:id, extra_key:str,
                          extra_value:int, player_stat_sum:dict, player_stat_mean:dict):
        if id_key == "team.id":
            find_id = self.check_team_id(dictionary, id_value)
        elif id_key == "player.id":
            find_id = self.check_player_id(dictionary, id_value)
        else:
            assert False, "Неправильный id"

        if extra_key:
            find_row = find_id & (dictionary[extra_key] == extra_value)
        else:
            find_row = find_id

        for key_sum, value_sum in player_stat_sum.items():
            if value_sum == None:                
                continue
            dictionary.loc[find_row, [key_sum]] += float(value_sum)

        for key_mean in player_stat_mean.keys():
            key_sum_value = dictionary.loc[find_row, [key_mean + "_sum"]].values
            key_count = dictionary.loc[find_row, ["stats_count"]].values
            dictionary.loc[find_row, [key_mean]] = key_sum_value / key_count

        return dictionary


    def change_team_all_stats_and_location(self, team_id:id, player_stat_sum:dict, player_stat_mean:dict):
        self.team_all_stats_and_location = self.change_dictionary(dictionary= self.team_all_stats_and_location,
                                                                   id_key= "team.id", id_value= team_id,
                                                                   extra_key= None, extra_value= None,
                                                                   player_stat_sum= player_stat_sum,
                                                                  player_stat_mean= player_stat_mean)

    def change_team_map_info_stats(self, team_id:id, game_map:int, player_stat_sum:dict, player_stat_mean:dict):
        self.team_map_info_stats = self.change_dictionary(dictionary= self.team_map_info_stats,
                                                       id_key= "team.id", id_value= team_id,
                                                       extra_key= "map.id", extra_value= game_map,
                                                       player_stat_sum= player_stat_sum,
                                                        player_stat_mean= player_stat_mean)

    def change_team_game_month_stats(self, team_id:id, month:int, player_stat_sum:dict, player_stat_mean:dict):
        self.team_game_month_stats = self.change_dictionary(dictionary= self.team_game_month_stats,
                                                       id_key= "team.id", id_value= team_id,
                                                       extra_key= "month", extra_value= month,
                                                       player_stat_sum= player_stat_sum,
                                                        player_stat_mean= player_stat_mean)

    def change_team_game_hour_stats(self, team_id:id, hour:int, player_stat_sum:dict, player_stat_mean:dict):
        self.team_game_hour_stats = self.change_dictionary(dictionary= self.team_game_hour_stats,
                                                       id_key= "team.id", id_value= team_id,
                                                       extra_key= "hour", extra_value= hour,
                                                       player_stat_sum= player_stat_sum,
                                                           player_stat_mean= player_stat_mean)

    def change_team_all_pair_stats(self, team_id:id, opponent_id:int, player_stat_sum:dict, player_stat_mean:dict):
        self.team_all_pair_stats = self.change_dictionary(dictionary= self.team_all_pair_stats,
                                                       id_key= "team.id", id_value= team_id,
                                                       extra_key= "opponent.id", extra_value= opponent_id,
                                                       player_stat_sum= player_stat_sum,
                                                        player_stat_mean= player_stat_mean)


    ############

    def change_player_all_stats(self, player_id:id, player_stat_sum:dict, player_stat_mean:dict):
        self.player_all_stats = self.change_dictionary(dictionary= self.player_all_stats,
                                                       id_key= "player.id", id_value= player_id,
                                                       extra_key= None, extra_value= None,
                                                       player_stat_sum=player_stat_sum,
                                                       player_stat_mean=player_stat_mean)

    def change_player_map_info_stats(self, player_id:id, game_map:int, player_stat_sum:dict, player_stat_mean:dict):
        self.player_map_info_stats = self.change_dictionary(dictionary= self.player_map_info_stats,
                                                           id_key= "player.id", id_value= player_id,
                                                           extra_key= "map.id", extra_value= game_map,
                                                            player_stat_sum=player_stat_sum,
                                                            player_stat_mean=player_stat_mean)

    def change_player_game_month_stats(self, player_id:id, month:int, player_stat_sum:dict, player_stat_mean:dict):
        self.player_game_month_stats = self.change_dictionary(dictionary= self.player_game_month_stats,
                                                               id_key= "player.id", id_value= player_id,
                                                               extra_key= "month", extra_value= month,
                                                              player_stat_sum=player_stat_sum,
                                                              player_stat_mean=player_stat_mean)

    def change_player_game_hour_stats(self, player_id:id, hour:int, player_stat_sum:dict, player_stat_mean:dict):
        self.player_game_hour_stats = self.change_dictionary(dictionary= self.player_game_hour_stats,
                                                               id_key= "player.id", id_value= player_id,
                                                               extra_key= "hour", extra_value= hour,
                                                             player_stat_sum=player_stat_sum,
                                                             player_stat_mean=player_stat_mean)



class WinnerPrediction4Map(FeaturesGenerator):

    def __init__(self):
        FeaturesGenerator.__init__(self)        

    def collect_features_with_map(self, team1:id, team1_players:list, team2:id, team2_players:list, game_map:id, game_month:int, game_hour:int, terrorist_begin:int=None) -> np.array:
        
        team1_profile = self.team_profile_with_map(team1, team2, game_map, game_month, game_hour)

        team1_players_profile = self.players_5_profile_with_map(team1_players, game_map, game_month, game_hour) 

        team2_profile = self.team_profile_with_map(team2, team1, game_map, game_month, game_hour)

        team2_players_profile = self.players_5_profile_with_map(team2_players, game_map, game_month, game_hour)


        # data_vector = np.append(team1_players_profile, team2_players_profile)
        data_vector = np.append(team1_profile, team1_players_profile)
        data_vector = np.append(data_vector, team2_profile)
        data_vector = np.append(data_vector, team2_players_profile)
        data_vector = np.append(data_vector, np.array([game_map, game_month, game_hour]))
        
        return data_vector.reshape(1, len(data_vector))

    def collect_features_with_map_and_terrorist(self, team1:id, team1_players:list, team2:id, team2_players:list, game_map:id, game_month:int, game_hour:int, terrorist_begin:int) -> np.array:
        data_vector = self.collect_features_with_map( team1, team1_players, team2, team2_players, game_map, game_month, game_hour)
        data_vector = np.append(data_vector, [[terrorist_begin]], axis=1)

        return data_vector

    def collect_features_with_predict(self, cf_function, team1:id, team1_players:list, team2:id, team2_players:list, game_map:id, game_month:int, game_hour:int, terrorist_begin:int=None, winner:int=None)-> np.array:
        data_vector = cf_function( team1, team1_players, team2, team2_players, game_map, game_month, game_hour, terrorist_begin)
        data_vector = np.append(data_vector, winner)

        return data_vector.reshape(1, len(data_vector))


    def collect_columns_with_map(self) -> np.array:
        team1_col = np.append(self.teams_columns_with_map(), self.players_5_columns_with_map())  + "_team1"
        team2_col = np.append(self.teams_columns_with_map(), self.players_5_columns_with_map())  + "_team2"

        data_columns = np.append(team1_col, team2_col)
        data_columns = np.append(data_columns, np.array(["game_map", "game_month", "game_hour"]))

        return data_columns

    def collect_columns_with_map_and_terrorist(self) -> np.array:
        data_columns = self.collect_columns_with_map()
        data_columns = np.append(data_columns, ["terrorist_begin_team"])

        return data_columns

    def collect_columns_with_predict(self, collect_columns_func):

        data_columns = collect_columns_func()

        data_columns = np.append(data_columns, ["winner"])

        return data_columns

        ###############

    def fill_the_dictionary(self, games):

        self.data_for_train_model_with_map = pd.DataFrame()

        for num_game, game in tqdm.tqdm(enumerate(games), total = len(games)):

            

            game_id = game["id"]
            winner_id = game["winner"]["id"]

            datetime_object = parser.parse(game["begin_at"])
            month = datetime_object.month
            hour = datetime_object.hour

            game_map = game["map"]["id"]            
                
            team1_id = game["teams"][0]["id"]
            team1_players_id = []
            team2_id = game["teams"][1]["id"]
            team2_players_id = []

            players_stats = game["players"]
            if len(players_stats) != 10:                
                assert False, f"Problems with count players in {num_game} game"

            for player in players_stats:
                player_id = player["player"]["id"]
                team_id = player["team"]["id"]
                opponent_id = player["opponent"]["id"]
                team_location = player["team"]["location"]
                player_nationality = player["player"]["nationality"]

                if team_id == team1_id:
                    team1_players_id.append(player_id)
                elif team_id == team2_id:
                    team2_players_id.append(player_id)
                else:                    
                    assert False, "Problems in team id with players %d" % player_id

                player_stat_sum = {
                    "assists": float(player["assists"] or 0.0),
                    "deaths": float(player["deaths"] or 0.0),
                    "flash_assists": float(player["flash_assists"] or 0.0),
                    "headshots": float(player["headshots"] or 0.0),
                    "kills": float(player["kills"] or 0.0),
                    "adr_sum": float(player["adr"] or 0.0),
                    "kast_sum": float(player["kast"] or 0.0),
                    "rating_sum": float(player["rating"] or 0.0),
                    "stats_count": 1

                }

                player_stat_mean = {
                    "adr": float(player["adr"] or 0.0),
                    "kast": float(player["kast"] or 0.0),
                    "rating": float(player["rating"] or 0.0)

                }

                # TODO Сделать всё маленькими функциями

                teams_dict = self.team_all_stats_and_location
                team_check = self.only_team_id_dictionary(teams_dict, team_id)
                if team_check.empty:
                    self.add_team_all_stats_and_location(team_id, team_location, player_stat_sum, player_stat_mean)
                    self.add_team_map_info_stats(team_id, game_map, player_stat_sum, player_stat_mean)
                    self.add_team_game_month_stats(team_id, month, player_stat_sum, player_stat_mean)
                    self.add_team_game_hour_stats(team_id, hour, player_stat_sum, player_stat_mean)
                    self.add_team_all_pair_stats(team_id, opponent_id, player_stat_sum, player_stat_mean)
                else:

                    self.change_team_all_stats_and_location(team_id, player_stat_sum, player_stat_mean)
                    self.change_team_map_info_stats(team_id, game_map, player_stat_sum, player_stat_mean)
                    self.change_team_game_month_stats(team_id, month, player_stat_sum, player_stat_mean)
                    self.change_team_game_hour_stats(team_id, hour, player_stat_sum, player_stat_mean)
                    self.change_team_all_pair_stats(team_id, opponent_id, player_stat_sum, player_stat_mean)

                players_dict = self.player_all_stats
                check_player_dict = players_dict[self.check_player_id(players_dict, player_id)]
                if check_player_dict.empty:
                    self.add_player_all_stats(player_id, player_stat_sum, player_stat_mean)
                    self.add_player_info_passport(player_id, player_nationality)
                    self.add_player_map_info_stats(player_id, month, player_stat_sum, player_stat_mean)
                    self.add_player_game_month_stats(player_id, hour, player_stat_sum, player_stat_mean)
                    self.add_player_game_hour_stats(player_id, opponent_id, player_stat_sum, player_stat_mean)
                else:
                    self.change_player_all_stats(player_id, player_stat_sum, player_stat_mean)
                    self.change_player_map_info_stats(player_id, month, player_stat_sum, player_stat_mean)
                    self.change_player_game_month_stats(player_id, hour, player_stat_sum, player_stat_mean)
                    self.change_player_game_hour_stats(player_id, opponent_id, player_stat_sum, player_stat_mean)

            self.change_team_all_pair_wins(team1_id, team2_id, winner_id)
            self.change_team_all_pair_wins(team2_id, team1_id, winner_id)

            game_unstack = self.collect_features_with_map(team1=team1_id,
                                                        team1_players=team1_players_id,
                                                        team2=team2_id,
                                                        team2_players=team2_players_id,
                                                        game_map=game_map,
                                                        game_month=month,
                                                        game_hour=hour)


            columns = self.collect_columns_with_predict(self.collect_columns_with_map)
            game_unstack = np.append(game_unstack, [int(winner_id==team1_id)])
            game_unstack = game_unstack.reshape(1, len(game_unstack))
            game_data_with_map = pd.DataFrame(game_unstack, columns=columns)

            self.data_for_train_model_with_map = self.data_for_train_model_with_map.append(game_data_with_map, ignore_index=True)  



In [2]:
ROOT_DIR = os.path.abspath('.')

TEST_SIZE = .1
CONST_PARAMS= {
            'iterations':1000,
            'loss_function':'Logloss',
            'od_type':'Iter',
            'od_wait':20,
            'verbose':1,
            }
SEED=13
EARLY_STOPPING=20


path_games = 'games.pickle'

try:    

    path_team_all_stats_and_location = os.path.join(ROOT_DIR, "data\\team_all_stats_and_location.pickle")
    path_team_map_info_stats = os.path.join(ROOT_DIR, "data\\team_map_info_stats.pickle")
    path_team_game_month_stats = os.path.join(ROOT_DIR, "data\\team_game_month_stats.pickle")
    path_team_game_hour_stats = os.path.join(ROOT_DIR, "data\\team_game_hour_stats.pickle")
    path_team_all_pair_stats = os.path.join(ROOT_DIR, "data\\team_all_pair_stats.pickle")

    path_player_all_stats = os.path.join(ROOT_DIR, "data\\player_all_stats.pickle")
    path_player_info_passport = os.path.join(ROOT_DIR, "data\\player_info_passport.pickle")
    path_player_map_info_stats = os.path.join(ROOT_DIR, "data\\player_map_info_stats.pickle")
    path_player_game_month_stats = os.path.join(ROOT_DIR, "data\\player_game_month_stats.pickle")
    path_player_game_hour_stats = os.path.join(ROOT_DIR, "data\\player_game_hour_stats.pickle")

    path_data_for_train_model_with_map = os.path.join(ROOT_DIR, "data\\data_for_train_model_with_map.pickle")

except:

    path_team_all_stats_and_location = os.path.join(ROOT_DIR, "data\team_all_stats_and_location.pickle")
    path_team_map_info_stats = os.path.join(ROOT_DIR, "data\team_map_info_stats.pickle")
    path_team_game_month_stats = os.path.join(ROOT_DIR, "data\team_game_month_stats.pickle")
    path_team_game_hour_stats = os.path.join(ROOT_DIR, "data\team_game_hour_stats.pickle")
    path_team_all_pair_stats = os.path.join(ROOT_DIR, "data\team_all_pair_stats.pickle")

    path_player_all_stats = os.path.join(ROOT_DIR, "data\player_all_stats.pickle")
    path_player_info_passport = os.path.join(ROOT_DIR, "data\player_info_passport.pickle")
    path_player_map_info_stats = os.path.join(ROOT_DIR, "data\player_map_info_stats.pickle")
    path_player_game_month_stats = os.path.join(ROOT_DIR, "data\player_game_month_stats.pickle")
    path_player_game_hour_stats = os.path.join(ROOT_DIR, "data\player_game_hour_stats.pickle")

    path_data_for_train_model_with_map = os.path.join(ROOT_DIR, "data\data_for_train_model_with_map.pickle")


In [5]:
# with open(path_games, 'rb') as f:
#     games= pickle.load(f)

# L_dates = []
# for game in games:
#     date = parser.parse(game['begin_at'])
#     L_dates.append(date)

# games = np.array(games)[np.argsort(L_dates)[-5000:]].tolist()

# wp4m = WinnerPrediction4Map()
# wp4m.fill_the_dictionary(games)

In [4]:
data_for_train_model_with_map = pd.read_pickle('data_for_train_model_with_map.pickle')


X = data_for_train_model_with_map.drop('winner', 1)
y = data_for_train_model_with_map['winner'].astype(int)

X_train, X_eval, y_train, y_eval = train_test_split(X, y, test_size =TEST_SIZE, shuffle = False)

del data_for_train_model_with_map, X, y
gc.collect()

0

In [6]:
params = CONST_PARAMS.copy()
params['use_best_model'] = True
params['random_state'] = SEED

model = cb.CatBoostClassifier(**params)   
model.fit(X_train, y_train, eval_set=(X_eval, y_eval))

d_metrics = {}
d_metrics['f1'] = f1_score(y_eval, model.predict(X_eval))
d_metrics['accuracy'] = accuracy_score(y_eval, model.predict(X_eval))
d_metrics['roc_auc'] = roc_auc_score(y_eval, model.predict_proba(X_eval)[:, 1])

y_true = y_eval.copy()
y_pred = model.predict(X_eval)

d_metrics['tp'] = np.sum((y_true==1)&(y_pred==1))
d_metrics['tn'] = np.sum((y_true==0)&(y_pred==0))
d_metrics['fp'] = np.sum((y_true==0)&(y_pred==1))
d_metrics['fn'] = np.sum((y_true==1)&(y_pred==0))

feature_importances = pd.Series(dict(zip(X_train.columns, model.feature_importances_))).sort_values(ascending = False)    

Learning rate set to 0.045944
0:	learn: 0.5506451	test: 0.5521362	best: 0.5521362 (0)	total: 188ms	remaining: 3m 7s
1:	learn: 0.4427378	test: 0.4458182	best: 0.4458182 (1)	total: 241ms	remaining: 2m
2:	learn: 0.3639768	test: 0.3675652	best: 0.3675652 (2)	total: 294ms	remaining: 1m 37s
3:	learn: 0.2913615	test: 0.2952870	best: 0.2952870 (3)	total: 351ms	remaining: 1m 27s
4:	learn: 0.2345065	test: 0.2382034	best: 0.2382034 (4)	total: 415ms	remaining: 1m 22s
5:	learn: 0.1897440	test: 0.1932601	best: 0.1932601 (5)	total: 464ms	remaining: 1m 16s
6:	learn: 0.1544811	test: 0.1584201	best: 0.1584201 (6)	total: 522ms	remaining: 1m 14s
7:	learn: 0.1274285	test: 0.1304331	best: 0.1304331 (7)	total: 579ms	remaining: 1m 11s
8:	learn: 0.1056658	test: 0.1079651	best: 0.1079651 (8)	total: 631ms	remaining: 1m 9s
9:	learn: 0.0893569	test: 0.0914085	best: 0.0914085 (9)	total: 682ms	remaining: 1m 7s
10:	learn: 0.0757600	test: 0.0775629	best: 0.0775629 (10)	total: 738ms	remaining: 1m 6s
11:	learn: 0.062034

In [7]:
d_metrics

{'f1': 1.0,
 'accuracy': 1.0,
 'roc_auc': 1.0,
 'tp': 269,
 'tn': 231,
 'fp': 0,
 'fn': 0}

In [11]:
feature_importances.head(10)

wins_mean_pair_team2               33.228463
wins_sum_pair_team1                24.197911
wins_mean_pair_team1               18.778687
wins_sum_pair_team2                18.229593
nationality_DE_team1                0.082307
rating_player3_map_team2            0.076561
deaths_team2                        0.074319
adr_sum_team1                       0.073948
kast_player4_hour_team2             0.065512
flash_assists_player5_map_team2     0.062595
dtype: float64

In [10]:
X_train.head()

Unnamed: 0,team.id_team1,assists_team1,deaths_team1,flash_assists_team1,headshots_team1,kills_team1,adr_sum_team1,kast_sum_team1,rating_sum_team1,adr_team1,...,adr_sum_player5_hour_team2,kast_sum_player5_hour_team2,rating_sum_player5_hour_team2,adr_player5_hour_team2,kast_player5_hour_team2,rating_player5_hour_team2,stats_count_player5_hour_team2,game_map,game_month,game_hour
0,126672.0,1938.0,8292.0,527.0,3406.0,7386.0,30597.798828,27837.400391,407.75,72.0,...,1097.0,1044.0,15.25,73.125,69.5625,1.016602,15.0,8.0,12.0,2.0
1,125802.0,6728.0,23559.0,1805.0,10928.0,25032.0,106470.898438,100020.203125,1555.0,76.3125,...,936.0,882.5,13.046875,72.0,67.875,1.003906,13.0,2.0,12.0,2.0
2,125751.0,8800.0,32264.0,2102.0,15800.0,35352.0,145699.703125,137863.1875,2148.0,76.3125,...,856.0,825.5,12.046875,71.3125,68.8125,1.003906,12.0,2.0,12.0,2.0
3,5793.0,11288.0,42864.0,3610.0,20880.0,42590.0,179406.609375,167714.203125,2548.0,74.125,...,798.5,718.0,10.6875,72.5625,65.3125,0.97168,11.0,7.0,12.0,2.0
4,3260.0,6640.0,25732.0,1783.0,12072.0,26329.0,109941.203125,106245.804688,1600.0,73.3125,...,120.8125,129.25,1.519531,60.40625,64.625,0.759766,2.0,31.0,12.0,2.0
