In [77]:
import numpy as np
import pandas as pd
from numba import njit

class Data:
    '''
    Class for manipulating the data and extracting characteristics.
    Attributes:
        today (pd.datetime64): current date (`pandas._libs.tslibs.timestamps.Timestamp`)
        self.bankroll (int): bankroll from the summary
    '''

    # TODO: Make everything that is possible inplace and copy=False to increase performance
    # TODO: Add dtypes to the self.attributes that are dataframes for faster operations [TLE] <16-11-20, kunzaatko> #
    def __init__(self, sort_columns=True):
        '''
        Parameters:
            sort_columns(True): Sort the columns of the dataframes
        '''

        ########################
        #  private attributes  #
        ########################
        self._sort_columns = sort_columns
        self._curr_inc_teams= None # teams that are in inc
        self._curr_opps_teams= None # teams that are in opps

        ########################
        #  Storage attributes  #
        ########################
        self.yesterday = None # this is used for initialization in very first inc of data and then as reference to yesterday
        self.today = None # current date
        self.bankroll = None # current bankroll

        ##########################
        #  Essential attributes  #
        ##########################
        # FIXME: The 'opps_Date' column is does not work, since we get multiple the matches with the same ID for several consecutive days <16-11-20, kunzaatko> #
        # FIXME: Also the P_dis that is evaluated by our model can change from day to day so the P_dis, that we have stored is only the last one <16-11-20, kunzaatko> #

        # `self.matches`
        # index    || 'opps_Date'            | 'Sea'  | 'Date'       | 'Open'                      | 'LID'           | 'HID'        | 'AID'
        # match ID || date of opps occurence | season | date of play | date of betting possibility | league ID (str) | home team ID | away team ID
        #           | 'HSC'             | 'ASC'             | 'H'      | 'D'  | 'A'      | 'OddsH'          | 'OddsD'      | 'OddsA'
        #           | home goals scored | away goals scored | home win | draw | away win | odds of home win | odds of draw | odds of away win
        #           | 'P(H)'               | 'P(D)'           | 'P(A)'               | 'BetH'       | 'BetD'   | 'BetA'
        #           | model prob. home win | model prob. draw | model prob. away win | bet home win | bet draw | bet away win

        self.matches = pd.DataFrame(columns=['opps_Date','Sea','Date','Open','LID','HID','AID','HSC','ASC','H','D','A','OddsH','OddsD','OddsA','P(H)','P(D)', 'P(A)','BetH','BetD','BetA']) # All matches played by IDs ﭾ


        #########################
        #  Features attributes  #
        #########################

        # `self.LL_data`
        # LL: life-long
        # index   || 'LID'            | 'LL_Goals_Scored' | 'LL_Goals_Conceded' | 'LL_Wins' | 'LL_Draws' | 'LL_Loses'
        # team ID || league ID (list) | goals scored      | goals conceded      | wins      | draws      | loses
        #          | 'LL_Played'    | 'LL_Accu'
        #          | played matches | model accuracy
        self.LL_data = pd.DataFrame(columns=['LID','LL_Goals_Scored','LL_Goals_Conceded','LL_Wins', 'LL_Draws', 'LL_Loses', 'LL_Played', 'LL_Accu']) # recorded teams

        # `self.SL_data`
        # SL: season-long
        # index (multiindex)|| 'LID'            | 'SL_Goals_Scored' | 'SL_Goals_Conceded' | 'SL_Wins' | 'SL_Draws' | 'SL_Loses'
        # season,team ID    || league ID (list) | goals scored      | goals conceded      | wins      | draws      | loses
        #                    | 'SL_Played'    | 'SL_Accu'
        #                    | played matches | model accuracy
        self.SL_data = pd.DataFrame(columns=['LID','SL_Goals_Scored', 'SL_Goals_Conceded', 'SL_Wins', 'SL_Draws', 'SL_Loses', 'SL_Played', 'SL_Accu']) # data frame for storing all the time characteristics for seasons


        # `self.match_data`
        # index   || 'MatchID' | 'Sea'  | 'Date'       | 'Oppo'      | 'Home'       | 'Away'       | 'M_Goals_Scored' | 'M_Goals_Conceded'
        # team ID || match ID  | season | date of play | opponent id | team is home | team is away | goals scored     | goals conceded
        #          | 'M_Win'   | 'M_Draw'  | 'M_Lose'   | 'M_P(Win)'      | 'M_P(Draw)'      | 'M_P(Lose)'      | 'M_Accu'
        #          | match win | match draw| match lose | model prob. win | model prob. draw | model prob. lose | model accuracy
        self.match_data = pd.DataFrame(columns=['MatchID', 'Date' , 'Oppo', 'Home', 'Away',  'M_Goals_Scored', 'M_Goals_Conceded', 'M_Win','M_Draw', 'M_Lose','M_P(Win)','M_P(Draw)', 'M_P(Lose)','M_Accu'])


    ######################################
    #  UPDATING THE DATA STORED IN SELF  #
    ######################################

    def update_data(self, opps=None ,summary=None, inc=None, P_dis=None, bets=None):
        # {{{
        '''
        Run the iteration update of the data stored.
        ! Summary has to be updated first to get the right date!
        Parameters:
        All the parameters are supplied by the evaluation loop.
        opps(pandas.DataFrame): dataframe that includes the opportunities for betting.
        summary(pandas.DataFrame): includes the `Max_bet`, `Min_bet` and `Bankroll`.
        inc(pandas.DataFrame): includes the played matches with the scores for the model.
        '''
        if summary is not None:
            self._EVAL_summary(summary)

        if inc is not None:
            inc = inc.loc[:,~inc.columns.str.match('Unnamed')] # removing the 'Unnamed: 0' column (memory saning) See: https://stackoverflow.com/questions/36519086/how-to-get-rid-of-unnamed-0-column-in-a-pandas-dataframe
            self._curr_inc_teams = np.unique(np.concatenate((inc['HID'].to_numpy(dtype='int64'),inc['AID'].to_numpy(dtype='int64'))))
            self._EVAL_inc(inc)
            
            
        if opps is not None:
            opps = opps.loc[:,~opps.columns.str.match('Unnamed')] # removing the 'Unnamed: 0' column (memory saning) See: https://stackoverflow.com/questions/36519086/how-to-get-rid-of-unnamed-0-column-in-a-pandas-dataframe
            self._curr_opps_teams = np.unique(np.concatenate((opps['HID'].to_numpy(dtype='int64'),opps['AID'].to_numpy(dtype='int64'))))
            opps['opps_Date'] = self.today
            self._EVAL_opps(opps)

        if P_dis is not None:
            self._EVAL_P_dis(P_dis)

        if bets is not None:
            self._EVAL_bets(bets)

        if self._sort_columns:
            self.matches = self.matches[['opps_Date','Sea','Date','Open','LID','HID','AID','HSC','ASC','H','D','A','OddsH','OddsD','OddsA','P(H)','P(D)', 'P(A)','BetH','BetD','BetA']]
            self.LL_data = self.LL_data[['LID', 'LL_Goals_Scored','LL_Goals_Conceded','LL_Wins', 'LL_Draws', 'LL_Loses', 'LL_Played', 'LL_Accu']]

        # }}}

    def _EVAL_summary(self, summary):
        # {{{
        self.today = summary['Date'][0]
        self.bankroll = summary['Bankroll'][0]
        # }}}

    def _EVAL_inc(self, inc):
        # {{{
        self._eval_teams(inc, self._curr_inc_teams)
        self._eval_matches(inc)
        # }}}

    def _EVAL_opps(self, opps):
        # {{{
        self._eval_teams(opps, self._curr_inc_teams)
        self._eval_matches(opps)
        # }}}

    def _EVAL_P_dis(self, P_dis):
        # {{{
        self._eval_matches(P_dis)
        # }}}

    def _EVAL_bets(self, bets):
        # {{{
        self._eval_matches(bets)
        # }}}

    def _eval_teams(self, data_frame, data_frame_teams):
        # {{{

        if not data_frame.empty:

            ###############
            #  NEW TEAMS  #
            ###############

            # teams that are already stored in the self.LL_data
            index_self_teams = self.LL_data.index.to_numpy(dtype='int64')
            # unique teams that are stored in the data frame
            index_data_frame = data_frame_teams
            # teams in the data_frame that are not stored in the self.LL_data
            index_new_teams = np.setdiff1d(index_data_frame, index_self_teams)

            if not len(index_new_teams) == 0: # if there are any new teams (otherwise invalid indexing)
                # DataFrame of new teams
                new_teams = pd.DataFrame(index=index_new_teams)
                lids_frame = pd.concat((data_frame[['HID','LID']].set_index('HID'),data_frame[['AID','LID']].set_index('AID'))) # TODO: This will not work if there are multiple LIDs for one team in one inc <15-11-20, kunzaatko> # NOTE: This is probably working only because the inc already added some teams.
                lids = lids_frame[~lids_frame.index.duplicated(keep='first')].loc[index_new_teams]
                # Making a list from the 'LID's
                new_teams['LID'] = lids.apply(lambda row: np.array([row.LID]), axis=1) # this is costly but is only run once for each match %timeit dataset['LID'] = dataset.apply(lambda row: [row.LID], axis=1) -> 463 ms ± 13.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
                self.LL_data = pd.concat((self.LL_data, new_teams))
                self.LL_data.fillna(0., inplace=True)

            ##############
            #  NEW LIDS  #
            ##############

            # NOTE: This could be optimised radically but it has shown to be a pain in the ass so this is it. If there will be a 'TLE' (time limit exceeded) error, this is the place to change <15-11-20, kunzaatko> #

            # teams in the data_frame that are stored in the self.LL_data (teams that could have been changed)
            index_old_teams = np.intersect1d(index_self_teams,index_data_frame)
            index_old_teams_HID = np.intersect1d(index_old_teams, data_frame['HID'].to_numpy(dtype='int64'))
            index_old_teams_AID = np.intersect1d(index_old_teams, data_frame['AID'].to_numpy(dtype='int64'))

            for index in index_old_teams_HID:
                if not type(data_frame.set_index('HID').loc[index]) == pd.DataFrame:
                    if not data_frame.set_index('HID').loc[index]['LID'] in self.LL_data.at[index,'LID']:
                        self.LL_data.at[index,'LID'] = np.append(self.LL_data.at[index,'LID'],data_frame.set_index('HID').at[index, 'LID'])
                else:
                    if not data_frame.set_index('HID').loc[index].iloc[0]['LID'] in self.LL_data.at[index,'LID']:
                        self.LL_data.at[index,'LID'] = np.append(self.LL_data.at[index,'LID'],data_frame.set_index('HID').at[index, 'LID'])

            for index in index_old_teams_AID:
                if not type(data_frame.set_index('AID').loc[index]) == pd.DataFrame:
                    if not data_frame.set_index('AID').loc[index]['LID'] in self.LL_data.at[index,'LID']:
                        self.LL_data.at[index,'LID'] = np.append(self.LL_data.at[index,'LID'],data_frame.set_index('AID').at[index, 'LID'])
                else:
                    if not data_frame.set_index('AID').loc[index].iloc[0]['LID'] in self.LL_data.at[index,'LID']:
                        self.LL_data.at[index,'LID'] = np.append(self.LL_data.at[index,'LID'],data_frame.set_index('AID').at[index, 'LID'])

            # see also (https://stackoverflow.com/questions/45062340/check-if-single-element-is-contained-in-numpy-array)}}}

    # TODO: Probably does not work correctly for the bets. The bets should not be combined for the `opps` and the `inc` but only for the `bets` dataframe. <17-11-20, kunzaatko> #
    # TODO: the 'opps_Date' is not working. The indexes should not be concatenated but appended for new matches if they do not have the same 'opps_Date'... (When they are not added on the same day) <17-11-20, kunzaatko> # -> the problem with this is though that we would have to groupby matchid to to access a match, and multiple MatchIDs would be the same in the dataframe -> We should consider adding a new frame with this data (or maybe the bets should be recorded as an associated series of the match... What is your oppinion/solution?
    def _eval_matches(self, data_frame):
        # {{{
        self.matches = self.matches.combine_first(data_frame)
        # }}}

        #####################################################################
        #  UPDATE THE FEATURES THAT CAN BE EXTRACTED FROM THE DATA IN SELF  #
        #####################################################################

    def update_features(self):
    # {{{
        '''
        Update the features for the data stored in `self`.
        '''
        self._UPDATE_LL_data_features()
        self._UPDATE_SL_data_features()
        self._UPDATE_match_data_features()
    # }}}

        self.yesterday = self.today # test me to not overwrite

    def _UPDATE_LL_data_features(self):
        '''
        TODO LL features are not cummulative, in very first iteration it is suddenly updated but it needs to be updated \
             already after very first match e.g. Match_ID=1 and this info used for training of model
        Populate all the features from the frame `self.LL_data`
        '''
        # This is needed because some characteristics as score and who won is not present in matches_played at self.today
        matches_played_before = self.matches[self.matches['Date'] < self.today] if self.yesterday is None else \
            self.matches.groupby('Date').get_group(self.yesterday) if self.yesterday in self.matches['Date'].to_numpy() \
            else None

        self._update_LL_Played(matches_played_before)
        self._update_LL_Goals(matches_played_before)
        self._update_LL_Res(matches_played_before)
        self._update_LL_Accu(matches_played_before)

    def _update_LL_Played(self, matches_played):
        '''
        Update 'LL_Played' (games) of the fram self.LL_data
        :param matches_played: pd.Dataframe:
            Contains matches played at self.yesterday
        '''
        if matches_played is not None:
            teams_played = np.unique(np.concatenate((matches_played['HID'].to_numpy(dtype='int64'),
                                                     matches_played['AID'].to_numpy(dtype='int64'))), return_counts=True)
            self.LL_data.loc[teams_played[0], 'LL_Played'] = self.LL_data.loc[teams_played[0], 'LL_Played'] + \
                                                             teams_played[1]

    def _update_LL_Goals(self, matches_played):
        '''
        Update 'LL_Goals_Scored' and 'LL_Goals_Conceded' of the frame `self.LL_data`
        '''
        if matches_played is not None:
            teams_goals_scored = np.concatenate([matches_played[['HID', 'HSC']].to_numpy(dtype='int64'),
                                                 matches_played[['AID', 'ASC']].to_numpy(dtype='int64')])
            teams_goals_conceded = np.concatenate([matches_played[['HID', 'ASC']].to_numpy(dtype='int64'),
                                                   matches_played[['AID', 'HSC']].to_numpy(dtype='int64')])

            scored = fast(teams_goals_scored)
            conceded = fast(teams_goals_conceded)
            self.LL_data.loc[scored[:, 0], 'LL_Goals_Scored'] = \
                self.LL_data.loc[scored[:, 0], 'LL_Goals_Scored'] + scored[:, 1]
            self.LL_data.loc[conceded[:, 0], 'LL_Goals_Conceded'] = \
                self.LL_data.loc[conceded[:, 0], 'LL_Goals_Conceded'] + conceded[:, 1]

    def _update_LL_Res(self, matches_played):
        '''
        Update 'LL_Wins', 'LL_Draws' and 'LL_Loses' of the frame `self.LL_data`
        '''
        if matches_played is not None:
            teams_wins = np.concatenate([matches_played[['HID', 'H']].to_numpy(dtype='int64'),
                                                 matches_played[['AID', 'A']].to_numpy(dtype='int64')])
            teams_loses = np.concatenate([matches_played[['HID', 'A']].to_numpy(dtype='int64'),
                                                   matches_played[['AID', 'H']].to_numpy(dtype='int64')])
            teams_draws = np.concatenate([matches_played[['HID', 'D']].to_numpy(dtype='int64'),
                                                   matches_played[['AID', 'D']].to_numpy(dtype='int64')])

            wins = fast(teams_wins)
            loses = fast(teams_loses)
            draws = fast(teams_draws)

            self.LL_data.loc[wins[:, 0], 'LL_Wins'] = self.LL_data.loc[wins[:, 0], 'LL_Wins'] + wins[:, 1]
            self.LL_data.loc[loses[:, 0], 'LL_Loses'] = self.LL_data.loc[loses[:, 0], 'LL_Loses'] + loses[:, 1]
            self.LL_data.loc[draws[:, 0], 'LL_Draws'] = self.LL_data.loc[draws[:, 0], 'LL_Draws'] + draws[:, 1]

    def _update_LL_Accu(self, matches_played):
        '''
        Update 'LL_Accu' of the frame `self.LL_data`
        '''
        if matches_played is not None:
            pass

    def _UPDATE_SL_data_features(self):
        '''
        Populate all the features of `self.SL_data`
        '''
        # TODO: should be done incrementaly <17-11-20, kunzaatko> #
        # TODO I assume that 'self.SL_data' are updated when new team will be present in 'inc' (Many98)
        # This is needed because some characteristics as score and who won is not present in matches_played at self.today
        matches_played_before = self.matches[self.matches['Date'] < self.today] if self.yesterday is None else \
            self.matches.groupby('Date').get_group(self.yesterday) if self.yesterday in self.matches['Date'].to_numpy() \
            else None

        self._update_SL_Goals(matches_played_before)
        self._update_SL_Res(matches_played_before)
        self._update_SL_Played(matches_played_before)
        self._update_SL_Accu(matches_played_before)

    # TODO: Could be unified with `_update_LL_Goals` as `_update_Goals` but for different frames. <17-11-20, kunzaatko> #
    def _update_SL_Goals(self, matches_played):
        '''
        Update 'SL_Goals_Scored' and 'SL_Goals_Conceded' of the frame `self.SL_data`
        '''
        if matches_played is not None:
            seasons = [season for season in matches_played.groupby('Sea')]
            for sea, season in seasons:
                teams_goals_scored = np.concatenate([season[['HID', 'HSC']].to_numpy(dtype='int64'),
                                                     season[['AID', 'ASC']].to_numpy(dtype='int64')])
                teams_goals_conceded = np.concatenate([season[['HID', 'ASC']].to_numpy(dtype='int64'),
                                                       season[['AID', 'HSC']].to_numpy(dtype='int64')])

                scored = fast(teams_goals_scored)
                conceded = fast(teams_goals_conceded)

                ind_gs = [(sea, team_id) for team_id in scored[:, 0]]
                ind_gc = [(sea, team_id) for team_id in conceded[:, 0]]

                self.SL_data.loc[ind_gs, 'SL_Goals_Scored'] = \
                    self.SL_data.loc[ind_gs, 'SL_Goals_Scored'] + scored[:, 1]
                self.SL_data.loc[ind_gc, 'SL_Goals_Conceded'] = \
                    self.SL_data.loc[ind_gc, 'SL_Goals_Conceded'] + conceded[:, 1]



    # TODO: Could be unified with `_update_LL_Res` as `_update_Res` but for different frames. <17-11-20, kunzaatko> #
    def _update_SL_Res(self, matches_played):
        if matches_played is not None:
            seasons = [season for season in matches_played.groupby('Sea')]
            for sea, season in seasons:
                teams_wins = np.concatenate([matches_played[['HID', 'H']].to_numpy(dtype='int64'),
                                             matches_played[['AID', 'A']].to_numpy(dtype='int64')])
                teams_loses = np.concatenate([matches_played[['HID', 'A']].to_numpy(dtype='int64'),
                                              matches_played[['AID', 'H']].to_numpy(dtype='int64')])
                teams_draws = np.concatenate([matches_played[['HID', 'D']].to_numpy(dtype='int64'),
                                              matches_played[['AID', 'D']].to_numpy(dtype='int64')])

                wins = fast(teams_wins)
                loses = fast(teams_loses)
                draws = fast(teams_draws)

                ind_wins = [(sea, team_id) for team_id in wins[:, 0]]
                ind_loses = [(sea, team_id) for team_id in loses[:, 0]]
                ind_draws = [(sea, team_id) for team_id in draws[:, 0]]

                self.SL_data.loc[ind_wins, 'SL_Wins'] = \
                    self.SL_data.loc[ind_wins, 'SL_Wins'] + wins[:, 1]
                self.SL_data.loc[ind_loses, 'SL_Loses'] = \
                    self.SL_data.loc[ind_loses, 'SL_Loses'] + loses[:, 1]
                self.SL_data.loc[ind_draws, 'SL_Draws'] = \
                    self.SL_data.loc[ind_draws, 'SL_Draws'] + draws[:, 1]

    # TODO: Could be unified with `_update_LL_Played` as `_update_Played` but for different frames. <17-11-20, kunzaatko> #
    def _update_SL_Played(self, matches_played):
        if matches_played is not None:
            seasons = [season for season in matches_played.groupby('Sea')]
            for sea, season in seasons:
                teams_played = np.unique(np.concatenate((season['HID'].to_numpy(dtype='int64'),
                                                         season['AID'].to_numpy(dtype='int64'))), return_counts=True)
                ind_teams = [(sea, team_id) for team_id in teams_played[0]]

                self.SL_data.loc[ind_teams, 'SL_Played'] = self.SL_data.loc[ind_teams, 'SL_Played'] + \
                                                                 teams_played[1]

    # TODO: Could be unified with `_update_LL_Accu` as `_update_Accu` but for different frames. <17-11-20, kunzaatko> #
    def _update_SL_Accu(self, matches_played):
        '''
        Update 'SL_Accu' of the frame `self.LL_data`
        '''
        pass
    # }}}

    def _UPDATE_match_data_features(self):
    # {{{
        '''
        Populate all the features of `self.match_data`
        '''
        if self.today in self.matches['Date'].values:
            # a dataframe of all the todays matches (matches that where played on `self.today`)
            matches_played_today = self.matches.groupby('Date').get_group(self.today)
            self._update_add_matches(matches_played_today)

        # TODO: should be done incrementaly <17-11-20, kunzaatko> #
    # }}}

    # FIXME: does not update the matches that are not gone through at today... The matches in the first inc. <18-11-20, kunzaatko> #
    def _update_add_matches(self, matches_played_today):
    # {{{
        '''
        Add the matches that were played today. The fields 'MatchID', 'Date' == self.today, 'Oppo' == HID/AID, 'Home' & 'Away' (int 1/0), 'M_Goals_Scored' & 'M_Goals_Conceded' (int), 'M_Win' & 'M_Draw' & 'M_Lose' (int 1/0), 'M_P(Win)' & 'M_P(Draw)' & 'M_P(Lose)' (float), 'M_Accu' should be filled.
        '''
        # the matches that played as home
        matches_home = matches_played_today.set_index('HID').drop(labels=['Open','opps_Date'],axis=1)
        renames = {'AID':'Oppo', 'HSC':'M_Goals_Scored', 'ASC':'M_Goals_Conceded', 'H':'M_Win', 'D':'M_Draw', 'A':'M_Lose', 'P(H)':'P(Win)', 'P(D)':'P(Draw)', 'P(A)':'P(Lose)'}
        matches_home.rename(renames, axis=1, inplace=True)
        matches_home['Home'] = 1
        matches_home['Away'] = 0
        matches_home['MatchID'] = matches_played_today.index
        # TODO: Model accuracy <17-11-20, kunzaatko> #

        # the matches that played as away
        matches_away = matches_played_today.set_index('AID').drop(labels=['Open','opps_Date'],axis=1)
        renames = {'HID':'Oppo', 'ASC':'M_Goals_Scored', 'HSC':'M_Goals_Conceded', 'A':'M_Win', 'D':'M_Draw', 'H':'M_Lose', 'P(A)':'P(Win)', 'P(D)':'P(Draw)', 'P(H)':'P(Lose)'}
        matches_away.rename(renames, axis=1, inplace=True)
        matches_away['Home'] = 0
        matches_away['Away'] = 1
        matches_away['MatchID'] = matches_played_today.index
        # TODO: Model accuracy <17-11-20, kunzaatko> #

        # TODO: Do not create a new object but only concat. <17-11-20, kunzaatko> #
        self.match_data = self.match_data.append([matches_away, matches_home])
    # }}}


    # ┌─────────────────────┐
    # │ MATCHES GLOBAL DATA │
    # └─────────────────────┘

    def matches_with(self, ID, oppo_ID):
        '''
        Returns all the matches with a particular opponent.
        Parameters:
            oppo_ID(int): ID of the opponent.
            ID(int): team id
        Returns:
            pd.DataFrame
        '''
        pass

    # ┌───────────────────────────┐
    # │ LIFE-LONG CHARACTERISTICS │
    # └───────────────────────────┘

    def total_scored_goals_to_match(self, ID, number_of_matches):
        '''
        Total life-long score to match ratio.
        Parameters:
            ID(int): team id
            number_of_matches(int): num
        Returns:
            float: scored goals / # matches
        '''
        pass

    def home_win_r(self):
        '''
        Win rate for win when home.
        Parameters:
            ID(int): team index
        Returns:
            float: rate of win, when home
        '''
        pass

    def goals_ratio(self, ID, oppo_ID, matches = 1, vs = False):
        '''
        Returns (goals_scored/(goals_scored  + goals_conceded)) of first team or this vs statistics
        Parametrs:
            oppo_ID(int): ID of the opponent.
            ID(int): team id
            matches(int): numbers of matches to past
            vs(bool): set against each other
        Returns:
            float or 2 floats
        '''

        matches_period =self.matches[self.matches["HID"]==ID].append(self.matches[self.matches["AID"]==ID]).sort_index().tail(matches)
        if vs:
            matches_period =matches_period[matches_period["HID"]==oppo_ID].append(matches_period[matches_period["AID"]==oppo_ID]).sort_index().tail(matches)


        goals_conceded =1+matches_period[matches_period["HID"]==ID]['ASC'].sum()+matches_period[matches_period["AID"]==ID]['HSC'].sum()
        goals_scored =1+matches_period[matches_period["HID"]==ID]['HSC'].sum()+matches_period[matches_period["AID"]==ID]['ASC'].sum()
        goals_ID =goals_scored/(goals_scored + goals_conceded)
        if vs:
            return (goals_ID, (1-goals_ID))
        else:
            return goals_ID

    def wins(self, ID, months = None, matches=None):
        '''
        Returns wins in time or match period
        Parameters:
            ID(int): team id
            months(int) = numbers of months
            matches(int) = numbers of matches to past
        Returns:
            int
        '''
        if months != None:
            months_period =self.matches[self.matches['Date'].isin(pd.date_range(end=self.today, periods=(months*30), freq='D')[::-1])]
            wins = self.matches[self.matches["HID"]==ID]["H"].sum() + self.matches[self.matches["AID"]==ID]["A"].sum()
            return wins

        else:
            matches_period =self.matches[self.matches["HID"]==ID].append(self.matches[self.matches["AID"]==ID]).sort_index().tail(matches)
            wins = matches_period[matches_period["HID"]==ID]['H'].sum()+matches_period[matches_period["AID"]==ID]['A'].sum()
            return wins


@njit
def fast(pairs):
    """
    Calculates sum of vals for specific team present in first column of param pairs
    :param pairs: np.ndarray:
        every row contains team index and in second column of row is value e.g.
        [[Team_ID, num_of_scored_goals] X (num_of_played_games * 2)]:
        this can represent pairs of team:num_of_scored_goals: [[5, 2], [8, 3], [3, 4], [10, 4], [10, 3], [3, 8]]
    :return:
    """
    teams = np.unique(pairs[:, 0])
    out = np.zeros((teams.size, 2))
    for i, team in enumerate(teams):
        num = pairs[pairs[:, 0] == team][:, 1].sum()
        out[i, 0], out[i, 1] = team, num
    return out

In [78]:

class Environment:
    def __init__(self, dataset, interactor, init_bankroll=1000, min_bet=5, max_bet=100):
        dataset['BetH'] = 0.
        dataset['BetD'] = 0.
        dataset['BetA'] = 0.
        self.dataset = dataset
        self.interactor = interactor
        self.bankroll = init_bankroll
        self.min_bet = min_bet
        self.max_bet = max_bet
        self.last_seen = pd.to_datetime('1900-01-01')
        self.bet_cols = ['BetH', 'BetD', 'BetA']
        self.odds_cols = ['OddsH', 'OddsD', 'OddsA']
        self.score_cols = ['HSC', 'ASC']
        self.res_cols = ['H', 'D', 'A']
        self.label_cols = self.score_cols + self.res_cols

    def get_incremental_data(self, date):
        inc = self.dataset.loc[(self.dataset.Date > self.last_seen) & (self.dataset.Date < date)]
        self.last_seen = inc.Date.max() if not inc.empty else self.last_seen
    
        return inc

    def get_opps(self, date):
        opps = self.dataset[(self.dataset.Open <= date) & (self.dataset.Date >= date)]
        opps = opps[opps[self.odds_cols].sum(axis=1) > 0]
        return opps.drop(self.label_cols, axis=1)

    def run(self, start=None, end=None):
        start = start if start is not None else self.dataset.Open.min()
        end = end if end is not None else self.dataset.Date.max()

        print(f"Start: {start}, End: {end}")
        for date in pd.date_range(start, end):

            opps = self.get_opps(date)
            if opps.empty:
                continue

            inc = self.get_incremental_data(date)
            
            placed = opps[self.bet_cols].sum().sum()

            self.bankroll += self.evaluate_bets(inc)

            summary = self.generate_summary(date)
            print(f'{date:%Y-%m-%d}: available: {self.bankroll:.2f}, invested {placed:.2f}, total {self.bankroll+placed:.2f}')

            bets = self.get_bets(summary, inc, opps)

            validated_bets = self.validate_bets(bets, opps)

            self.place_bets(validated_bets)

        self.bankroll += self.evaluate_bets(self.get_incremental_data(end + pd.to_timedelta(1, 'days')))

        if hasattr(self.interactor, 'writeln'):
            self.send_updates(pd.DataFrame(), pd.DataFrame(), pd.DataFrame())

        return self.dataset

    def validate_bets(self, bets, opps):
        #print("Validating bets")
        rows = bets.index.intersection(opps.index)
        cols = bets.columns.intersection(self.bet_cols)
        validated_bets = bets.loc[rows, cols]  # allow bets only on the send opportunities
        validated_bets[validated_bets < self.min_bet] = 0.  # reject bets lower than min_bet
        validated_bets[validated_bets > self.max_bet] = 0.  # reject bets higher than max_bet
        if validated_bets.sum().sum() > self.bankroll:  # reject bets if there are no sufficient funds left
            validated_bets.loc[:, :] = 0.
        return validated_bets

    def place_bets(self, bets):
        #print("Placing bets")
        self.dataset.loc[bets.index, self.bet_cols] = self.dataset.loc[bets.index, self.bet_cols].add(bets, fill_value=0)
        self.bankroll -= bets.values.sum()

    def evaluate_bets(self, inc):
        if inc.empty:
            return 0
        b = inc[self.bet_cols].values
        o = inc[self.odds_cols].values
        r = inc[self.res_cols].values
        winnings = (b * r * o).sum(axis=1)
        return winnings.sum()

    def generate_summary(self, date):
        summary = {
            'Bankroll': self.bankroll,
            'Date': date,
            'Min_bet': self.min_bet,
            'Max_bet': self.max_bet,
        }
        return pd.Series(summary).to_frame().T

    def get_bets(self, summary: pd.DataFrame, inc: pd.DataFrame, opps: pd.DataFrame) -> pd.DataFrame:
        return self.interactor.place_bets(opps, summary, inc)

In [81]:
import numpy as np
import pandas as pd
import sys
sys.path.append('..')

#from hackathon.src.environment import Environment
#from models.feature_extraction.feature_extraction import Data

limits = {'start':'2000-03-20','end':'2011-06-30'}

dataset = pd.read_csv('/mnt/lustre/helios-home/srameon1/Downloads/fotbal_data.csv', parse_dates=['Date', 'Open'])
env = Environment(dataset, None)
data = Data()
inc = [env.get_incremental_data(date) for date in pd.date_range(start=limits['start'], end=limits['end'])]
opps = [env.get_opps(date) for date in pd.date_range(start=limits['start'], end=limits['end'])]
summary = [env.generate_summary(date) for date in pd.date_range(start=limits['start'], end=limits['end'])]
HIDWins5match =pd.DataFrame()
HIDWins10match=pd.DataFrame()
HIDWins15match=pd.DataFrame()
HIDWins20match=pd.DataFrame()
HIDWins1month=pd.DataFrame()
HIDWins2month=pd.DataFrame()
HIDWins4month=pd.DataFrame()
HIDWins6month=pd.DataFrame()
HIDWins12month=pd.DataFrame()
HIDGoalrat5match =pd.DataFrame()
HIDGoalrat10match=pd.DataFrame()
HIDGoalrat15match=pd.DataFrame()
HIDGoalrat20match=pd.DataFrame()
HIDGoalrat1month=pd.DataFrame()
HIDGoalrat2month=pd.DataFrame()
HIDGoalrat4month=pd.DataFrame()
HIDGoalrat6month=pd.DataFrame()
HIDGoalrat12month=pd.DataFrame()

AIDWins5match =pd.DataFrame()
AIDWins10match=pd.DataFrame()
AIDWins15match=pd.DataFrame()
AIDWins20match=pd.DataFrame()
AIDWins1month=pd.DataFrame()
AIDWins2month=pd.DataFrame()
AIDWins4month=pd.DataFrame()
AIDWins6month=pd.DataFrame()
AIDWins12month=pd.DataFrame()
AIDGoalrat5match =pd.DataFrame()
AIDGoalrat10match=pd.DataFrame()
AIDGoalrat15match=pd.DataFrame()
AIDGoalrat20match=pd.DataFrame()
AIDGoalrat1month=pd.DataFrame()
AIDGoalrat2month=pd.DataFrame()
AIDGoalrat4month=pd.DataFrame()
AIDGoalrat6month=pd.DataFrame()
AIDGoalrat12month=pd.DataFrame()

for o,i,s in zip(opps,inc,summary):
    data.update_data(opps=o,inc=i,summary=s)
    for HID, AID in zip(i["HID"], i["AID"]):
                
                HIDWins5match =HIDWins5match.append(pd.DataFrame([data.wins(HID, matches = 5)]))
                HIDWins10match=HIDWins10match.append(pd.DataFrame([data.wins(HID, matches = 10)]))
                HIDWins15match=HIDWins15match.append(pd.DataFrame([data.wins(HID, matches = 15)]))
                HIDWins20match=HIDWins20match.append(pd.DataFrame([data.wins(HID, matches = 20)]))
                HIDWins1month=HIDWins1month.append(pd.DataFrame([data.wins(HID, months = 1)]))
                HIDWins2month=HIDWins2month.append(pd.DataFrame([data.wins(HID, months = 2)]))
                HIDWins4month=HIDWins4month.append(pd.DataFrame([data.wins(HID, months = 4)]))
                HIDWins6month=HIDWins6month.append(pd.DataFrame([data.wins(HID, months = 6)]))
                HIDWins12month=HIDWins12month.append(pd.DataFrame([data.wins(HID, months = 12)]))
                HIDGoalrat5match =HIDGoalrat5match.append(pd.DataFrame([data.goals_ratio(HID, AID, matches = 5)]))
                HIDGoalrat10match=HIDGoalrat10match.append(pd.DataFrame([data.goals_ratio(HID, AID, matches = 10)]))
                HIDGoalrat15match=HIDGoalrat15match.append(pd.DataFrame([data.goals_ratio(HID, AID, matches = 15)]))
                HIDGoalrat20match=HIDGoalrat20match.append(pd.DataFrame([data.goals_ratio(HID, AID, matches = 20)]))
               
                
                AIDWins5match =AIDWins5match.append(pd.DataFrame([data.wins(AID, matches = 5)]))
                AIDWins10match=AIDWins10match.append(pd.DataFrame([data.wins(AID, matches = 10)]))
                AIDWins15match=AIDWins15match.append(pd.DataFrame([data.wins(AID, matches = 15)]))
                AIDWins20match=AIDWins20match.append(pd.DataFrame([data.wins(AID, matches = 20)]))
                AIDWins1month=AIDWins1month.append(pd.DataFrame([data.wins(AID, months = 1)]))
                AIDWins2month=AIDWins2month.append(pd.DataFrame([data.wins(AID, months = 2)]))
                AIDWins4month=AIDWins4month.append(pd.DataFrame([data.wins(AID, months = 4)]))
                AIDWins6month=AIDWins6month.append(pd.DataFrame([data.wins(AID, months = 6)]))
                AIDWins12month=AIDWins12month.append(pd.DataFrame([data.wins(AID, months = 12)]))
                AIDGoalrat5match =AIDGoalrat5match.append(pd.DataFrame([data.goals_ratio(AID, HID, matches = 5)]))
                AIDGoalrat10match=AIDGoalrat10match.append(pd.DataFrame([data.goals_ratio(AID, HID, matches = 10)]))
                AIDGoalrat15match=AIDGoalrat15match.append(pd.DataFrame([data.goals_ratio(AID, HID, matches = 15)]))
                AIDGoalrat20match=AIDGoalrat20match.append(pd.DataFrame([data.goals_ratio(AID, HID, matches = 20)]))
               

In [86]:
df = pd.read_csv('/mnt/lustre/helios-home/srameon1/Downloads/fotbal_data.csv')

In [87]:
HIDWins5match =HIDWins5match.reset_index()
HIDWins10match=HIDWins10match.reset_index()
HIDWins15match=HIDWins15match.reset_index()
HIDWins20match=HIDWins20match.reset_index()
HIDWins1month=HIDWins1month.reset_index()
HIDWins2month=HIDWins2month.reset_index()
HIDWins4month=HIDWins2month.reset_index()
HIDWins6month=HIDWins6month.reset_index()
HIDWins12month=HIDWins12month.reset_index()
HIDGoalrat5match =HIDGoalrat5match.reset_index()
HIDGoalrat10match=HIDGoalrat10match.reset_index()
HIDGoalrat15match=HIDGoalrat15match.reset_index()
HIDGoalrat20match= HIDGoalrat20match.reset_index()


AIDWins5match = AIDWins5match.reset_index()
AIDWins10match=AIDWins10match.reset_index()
AIDWins15match=AIDWins15match.reset_index()
AIDWins20match=AIDWins20match.reset_index()
AIDWins1month=AIDWins1month.reset_index()
AIDWins2month=AIDWins2month.reset_index()
AIDWins4month=AIDWins4month.reset_index()
AIDWins6month= AIDWins6month.reset_index()
AIDWins12month = AIDWins12month.reset_index()
AIDGoalrat5match =AIDGoalrat5match.reset_index()
AIDGoalrat10match=AIDGoalrat10match.reset_index()
AIDGoalrat15match=AIDGoalrat15match.reset_index()
AIDGoalrat20match=AIDGoalrat20match.reset_index()


In [67]:
Wins = Wins.reset_index()

In [88]:
posel=AIDWins5match[[0]]

In [89]:
posel['AIDWins10match']=AIDWins10match[[0]]
posel['AIDWins15match']=AIDWins15match[[0]]
posel['AIDWins20match']=AIDWins20match[[0]]
posel['AIDWins1month']=AIDWins1month[[0]]
posel['AIDWins2month']=AIDWins2month[[0]]
posel['AIDWins4month']=AIDWins4month[[0]]
posel['AIDWins6month']=AIDWins6month[[0]]
posel['AIDWins12month']=AIDWins12month[[0]]
posel['AIDGoalrat5match']=AIDGoalrat5match[[0]]
posel['AIDGoalrat10match']=AIDGoalrat10match[[0]]
posel['AIDGoalrat15match']=AIDGoalrat15match[[0]]
posel['AIDGoalrat20match']=AIDGoalrat20match[[0]]

In [90]:
posel['HIDWins5match']=HIDWins5match[[0]]
posel['HIDWins10match']=HIDWins10match[[0]]
posel['HIDWins15match']=HIDWins15match[[0]]
posel['HIDWins20match']=HIDWins20match[[0]]
posel['HIDWins1month']=HIDWins1month[[0]]
posel['HIDWins2month']=HIDWins2month[[0]]
posel['HIDWins4month']=HIDWins4month[[0]]
posel['HIDWins6month']=HIDWins6month[[0]]
posel['HIDWins12month']=HIDWins12month[[0]]
posel['HIDGoalrat5match']=HIDGoalrat5match[[0]]
posel['HIDGoalrat10match']=HIDGoalrat10match[[0]]
posel['HIDGoalrat15match']=HIDGoalrat15match[[0]]
posel['HIDGoalrat20match']=HIDGoalrat20match[[0]]

In [96]:
posel['HIDWins5match']=HIDWins5match[[0]]

In [91]:
posel['HSC']=df[['HSC']]
posel['ASC']=df[['ASC']]
posel['H']=df[['H']]
posel['D']=df[['D']]
posel['A']=df[['A']]

In [97]:
corrmatrix=posel.corr()

In [98]:
corrmatrix

Unnamed: 0,0,AIDWins10match,AIDWins15match,AIDWins20match,AIDWins1month,AIDWins2month,AIDWins4month,AIDWins6month,AIDWins12month,AIDGoalrat5match,...,HIDGoalrat5match,HIDGoalrat10match,HIDGoalrat15match,HIDGoalrat20match,HSC,ASC,H,D,A,HIDWins5match
0,1.0,0.759415,0.651153,0.581891,0.167398,0.167398,0.167398,0.167398,0.167398,0.782318,...,-0.186872,-0.139539,-0.125434,-0.12247,-0.233521,0.308252,-0.296363,-0.132969,0.468413,-0.122207
AIDWins10match,0.759415,1.0,0.879366,0.800366,0.268417,0.268417,0.268417,0.268417,0.268417,0.621801,...,-0.142445,-0.105323,-0.093788,-0.091923,-0.190518,0.248836,-0.241309,-0.075994,0.34887,-0.081132
AIDWins15match,0.651153,0.879366,1.0,0.928537,0.345746,0.345746,0.345746,0.345746,0.345746,0.548383,...,-0.123401,-0.091549,-0.089209,-0.087596,-0.174127,0.222517,-0.218758,-0.052493,0.299738,-0.067027
AIDWins20match,0.581891,0.800366,0.928537,1.0,0.406124,0.406124,0.406124,0.406124,0.406124,0.499989,...,-0.111971,-0.084393,-0.080687,-0.085824,-0.163826,0.201648,-0.204629,-0.03835,0.269541,-0.061324
AIDWins1month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386,-0.039884
AIDWins2month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386,-0.039884
AIDWins4month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386,-0.039884
AIDWins6month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386,-0.039884
AIDWins12month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386,-0.039884
AIDGoalrat5match,0.782318,0.621801,0.548383,0.499989,0.176067,0.176067,0.176067,0.176067,0.176067,1.0,...,-0.246329,-0.185855,-0.16707,-0.160383,-0.373495,0.33029,-0.415085,0.081445,0.386266,-0.209938


In [93]:
corrmatrix

Unnamed: 0,0,AIDWins10match,AIDWins15match,AIDWins20match,AIDWins1month,AIDWins2month,AIDWins4month,AIDWins6month,AIDWins12month,AIDGoalrat5match,...,HIDWins12month,HIDGoalrat5match,HIDGoalrat10match,HIDGoalrat15match,HIDGoalrat20match,HSC,ASC,H,D,A
0,1.0,0.759415,0.651153,0.581891,0.167398,0.167398,0.167398,0.167398,0.167398,0.782318,...,-0.024074,-0.186872,-0.139539,-0.125434,-0.12247,-0.233521,0.308252,-0.296363,-0.132969,0.468413
AIDWins10match,0.759415,1.0,0.879366,0.800366,0.268417,0.268417,0.268417,0.268417,0.268417,0.621801,...,0.023919,-0.142445,-0.105323,-0.093788,-0.091923,-0.190518,0.248836,-0.241309,-0.075994,0.34887
AIDWins15match,0.651153,0.879366,1.0,0.928537,0.345746,0.345746,0.345746,0.345746,0.345746,0.548383,...,0.061924,-0.123401,-0.091549,-0.089209,-0.087596,-0.174127,0.222517,-0.218758,-0.052493,0.299738
AIDWins20match,0.581891,0.800366,0.928537,1.0,0.406124,0.406124,0.406124,0.406124,0.406124,0.499989,...,0.09562,-0.111971,-0.084393,-0.080687,-0.085824,-0.163826,0.201648,-0.204629,-0.03835,0.269541
AIDWins1month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,0.513523,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386
AIDWins2month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,0.513523,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386
AIDWins4month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,0.513523,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386
AIDWins6month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,0.513523,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386
AIDWins12month,0.167398,0.268417,0.345746,0.406124,1.0,1.0,1.0,1.0,1.0,0.176067,...,0.513523,-0.036113,-0.026133,-0.022981,-0.02241,-0.069514,0.071126,-0.08362,0.004925,0.089386
AIDGoalrat5match,0.782318,0.621801,0.548383,0.499989,0.176067,0.176067,0.176067,0.176067,0.176067,1.0,...,-0.032638,-0.246329,-0.185855,-0.16707,-0.160383,-0.373495,0.33029,-0.415085,0.081445,0.386266


In [99]:
corrmatrix.to_csv(r'/mnt/lustre/helios-home/srameon1/Downloads/corrmatrix1.csv')
posel.to_csv(r'/mnt/lustre/helios-home/srameon1/Downloads/goalratwins1.csv')

In [71]:
corrmatrix = osel.corr()

In [72]:
corrmatrix

Unnamed: 0,0,Goals
0,1.0,0.345867
Goals,0.345867,1.0


In [None]:
     ########################
        #  Storage attributes  #
        ########################
        self.curr_summary = None # current `summary` ﭾ
        self.curr_bets = None # current `bets` ﭾ
        self.curr_opps = None # current `opps` ﭾ
        self.curr_inc = None # current `inc` ﭾ
        self.curr_P_dis = None # current `P_dis` ﭾ


        ##########################
        #  Essential attributes  #
        ##########################
        self.today = None # current date
        self.bankroll = None # current bankroll
        # self.betting_runs = pd.DataFrame(columns = ['Sea','LID', 'HID','AID','OddsH','OddsD','OddsA','P(H)', 'P(D)', 'P(A)','BetH','BetD','BetA']) # `opps` that was passed with the associated `P_dis`, and the associated `bets` (series). Indexed by the date that it occured in opps.
        self.matches = pd.DataFrame(columns=['opps_Date','Sea','Date','Open','LID','HID','AID','HSC','ASC','H','D','A','OddsH','OddsD','OddsA','BetH','BetD','BetA']) # All matches played by IDs ﭾ


        #########################
        #  Features attributes  #
        #########################
        # 'LID = Leagues' 'SC = score (pd.DataFrame(columns=['TEAM', 'OPPO']))', 'RES = result (pd.DataFrame(columns=['TEAM', 'DRAW', 'OPPO']))', 'PLAYED = #matches_played (int)', 'NEW = new (bool)', 'ACU = accuracy (float)'
        self.team_index = pd.DataFrame(columns=['LID','LL_SC', 'LL_RES', 'LL_PLAYED', 'LL_ACCU']) # recorded teams
        self.time_data = pd.DataFrame(columns=['SL_SC', 'SL_RES', 'SL_PLAYED', 'SL_ACCU']) # data frame for storing all the time characteristics for seasons

        # 'SC = score (TEAM, OPPO)', 'RES = result (pd.DataFrame(columns=['TEAM', 'DRAW', 'OPPO']))', 'DATE = date', 'LM_SIDE = home/away (str)', 'LM_P_DIS = pd.DataFrame(columns=['win_p', 'draw_p', 'lose_p'])'
        self.last_match_data = pd.DataFrame(columns=['MatchID', 'LM_SC (T,O)', 'LM_RES (T,D,O)', 'LM_DATE', 'LM_SIDE (H,A)', 'LM_P_DIS (W,D,L)']) # data frame for storing all the Last-match characteristics
        self.matches_data = pd.DataFrame(columns=['M_DATA_FRAME']) # data frame for moving the data of the last match when a new match is played


In [82]:
HIDWins5match

Unnamed: 0,0
0,1.0
0,1.0
0,0.0
0,0.0
0,0.0
...,...
0,1.0
0,1.0
0,0.0
0,3.0


In [55]:
data.match_data

Unnamed: 0,MatchID,Date,Oppo,Home,Away,M_Goals_Scored,M_Goals_Conceded,M_Win,M_Draw,M_Lose,M_P(Win),M_P(Draw),M_P(Lose),M_Accu


In [34]:
data.goals_ratio(519, 261, matches = 20, vs = False)



nan

In [54]:
data.matches

Unnamed: 0,opps_Date,Sea,Date,Open,LID,HID,AID,HSC,ASC,H,...,A,OddsH,OddsD,OddsA,P(H),P(D),P(A),BetH,BetD,BetA
0,NaT,2000.0,2000-03-19,2000-03-15,E1,593.0,341.0,3.0,0.0,1.0,...,0.0,0.00,0.00,0.00,,,,0.0,0.0,0.0
1,NaT,2000.0,2000-03-19,2000-03-15,E1,528.0,117.0,2.0,0.0,1.0,...,0.0,0.00,0.00,0.00,,,,0.0,0.0,0.0
2,NaT,2000.0,2000-03-19,2000-03-15,E1,486.0,491.0,0.0,0.0,0.0,...,0.0,0.00,0.00,0.00,,,,0.0,0.0,0.0
3,NaT,2000.0,2000-03-19,2000-03-15,E1,251.0,122.0,0.0,0.0,0.0,...,0.0,0.00,0.00,0.00,,,,0.0,0.0,0.0
4,NaT,2000.0,2000-03-19,2000-03-15,E1,483.0,599.0,0.0,1.0,0.0,...,1.0,0.00,0.00,0.00,,,,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37327,2011-06-26,2011.0,2011-06-30,2011-06-26,C1,224.0,474.0,,,,...,,2.26,3.74,3.18,,,,0.0,0.0,0.0
37328,2011-06-26,2011.0,2011-06-30,2011-06-26,C1,184.0,133.0,,,,...,,2.30,3.72,3.10,,,,0.0,0.0,0.0
37329,2011-06-26,2011.0,2011-06-30,2011-06-26,C1,134.0,111.0,,,,...,,1.73,4.17,4.82,,,,0.0,0.0,0.0
37330,2011-06-26,2011.0,2011-06-30,2011-06-26,O1,69.0,398.0,,,,...,,2.09,4.03,3.35,,,,0.0,0.0,0.0
