In [None]:
#| default_exp environment

In [None]:
#| hide

from IPython.core.debugger import set_trace

%load_ext autoreload
%autoreload 2

# OpenAI Gym Env

> Create a custom GYM environment to simulate trading strategy.

In [None]:
#| export

import json
from collections import namedtuple
import datetime
from pathlib import Path
from typing import Dict, Tuple

import gym
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from fastcore.basics import *

from fastbet.utils.asian_1x2_pnl import *

In [None]:
import plotly.io as pio

In [None]:
# if you use jupyter lab
pio.renderers.default = "iframe"

In [None]:
#| export

# Bet size (small, medium, large) -> range[0,1].
SMALL_BET, MEDIUM_BET, LARGE_BET = 0.05, 0.2, 0.7

# Named Tuple for actions
Actions = namedtuple(
    "Actions",
    [
        "no_bets",
        "small_bet_on_home_team_1x2",
        "medium_bet_on_home_team_1x2",
        "large_bet_on_home_team_1x2",
        "small_bet_on_away_team_1x2",
        "medium_bet_on_away_team_1x2",
        "large_bet_on_away_team_1x2",
        "small_bet_on_draw_1x2",
        "medium_bet_on_draw_1x2",
        "large_bet_on_draw_1x2",
        "small_bet_on_home_team_asian_handicap",
        "medium_bet_on_home_team_asian_handicap",
        "large_bet_on_home_team_asian_handicap",
        "small_bet_on_away_team_asian_handicap",
        "medium_bet_on_away_team_asian_handicap",
        "large_bet_on_away_team_asian_handicap",
    ],
)

Using the `config.toml` credentials included in the main repository, we must first load games data stored in our MongoDb Cluster before testing our betting environment.

In [None]:
from fastbet.config.localconfig import CONFIG,DB_HOSTS
from fastbet.datastructure.data_extractor import * 

In [None]:
fixtures = data_aggregator(db_hosts=DB_HOSTS, config=CONFIG, db_host="public_atlas")
fixtures.head()

Unnamed: 0,gameId,game_optaId,gameDate,homeTeamId,homeTeam_optaId,awayTeamId,awayTeam_optaId,tgt_gd,tgt_outcome,preGameOdds1,...,homeTeamLineupIds,homeTeamLineupSlots,homeTeamFormation,home_team_lineup_received_at,awayTeamName,awayTeamLineup,awayTeamLineupIds,awayTeamLineupSlots,awayTeamFormation,away_team_lineup_received_at
0,174dba7291174b4dbbfa9ea12dd944bb45bdd8ed905524...,990997,2018-08-22 18:45:00,126905d14981e6b97912ad4fec354035ccef26cb8ec4e1...,7,419088133137a53bfdb1b7e2e682d223d33a6fa075bbfe...,94,0,1.0,2.62,...,"[12150, 59115, 122806, 54764, 49773, 37339, 43...","[4, 6, 8, 9, 11, 7, 5, 2, 3, 1, 10]",4-4-1-1,2018-08-22 18:15:00,Brentford,"{""Daniel Bentley"": ""GK"", ""Ezri Konsa"": ""DCR"", ...","[79602, 199798, 115382, 114275, 176442, 223911...","[1, 5, 9, 8, 3, 6, 7, 10, 4, 11, 2]",4-2-3-1,2018-08-22 18:15:00
1,219ef70c0e8a803ec1efdb793443edfaa32398690c7829...,991003,2018-08-22 18:45:00,aeb2f56fcedbcf4cd5c780179766996c7bf0b308064541...,5,f8daf96ad35eebf1c0a5886c72734ba7dec366d6637052...,108,0,1.0,1.98,...,"[156685, 15398, 154561, 165183, 61602, 167802,...","[11, 9, 1, 3, 4, 5, 6, 2, 10, 8, 7]",4-2-3-1,2018-08-22 18:15:00,Reading,"{""Leandro Bacuna"": ""DMR"", ""Andy Yiadom"": ""DR"",...","[74297, 87408, 154133, 86857, 17506, 112133, 2...","[4, 2, 8, 6, 5, 3, 1, 7, 10, 11, 9]",4-4-2,2018-08-22 18:15:00
2,0655e244d8d596b5572e86426e2a7ca6178044efa59437...,991013,2018-08-25 14:00:00,9ee012a80cade2df55b71580bf5e238bcd6be6f696fdc1...,45,38ca605bcd29a5a37697ca66e533ae817ced71b6bf275c...,2,-3,2.0,2.62,...,"[52980, 84384, 108799, 83428, 57127, 87396, 20...","[2, 6, 8, 5, 7, 10, 1, 11, 3, 9, 4]",4-2-3-1,2018-08-25 13:30:00,Leeds United,"{""Mateusz Klich"": ""MCR"", ""Luke Ayling"": ""DR"", ...","[72222, 66588, 98760, 155405, 220037, 61810, 8...","[8, 2, 9, 4, 1, 5, 10, 7, 3, 11, 6]",4-1-4-1,2018-08-25 13:30:00
3,019c223b4a03917c2f1685beab4d5d278f7bff3913f239...,991018,2018-08-25 14:00:00,eb89c068ca204a72408360450847a990c97c5b5ff0ec9f...,110,bbb63e4ea54b0d60b48a1f8440254d7e656dfbfcbef825...,88,2,0.0,1.917,...,"[85352, 3773, 105666, 91972, 40555, 61858, 394...","[6, 9, 1, 10, 4, 7, 3, 11, 2, 5, 8]",4-4-1-1,2018-08-25 13:30:00,Hull City,"{""Eric Lichaj"": ""DR"", ""Jordy De Wijs"": ""DCL"", ...","[45139, 173549, 15144, 82771, 240499, 28541, 5...","[2, 6, 1, 4, 8, 9, 10, 3, 7, 11, 5]",4-4-1-1,2018-08-25 13:30:00
4,0f9ad12eec9f24277ab491f5f26f610eaa918903a34147...,991014,2018-08-25 16:30:00,04c71986b6503ba5b09a7098ceb79954d20049f21ba45b...,17,95d3bddc19a15d34a7876dcffc1a3e9bc63d809b69308a...,41,0,1.0,2.04,...,"[199796, 56827, 130593, 113564, 16045, 83427, ...","[7, 1, 5, 2, 8, 3, 4, 10, 9, 6, 11]",4-2-3-1,2018-08-25 16:00:00,Birmingham City,"{""Kristian Pedersen"": ""DL"", ""Maxime Colin"": ""D...","[229009, 86132, 77800, 114054, 69842, 85365, 8...","[3, 2, 4, 9, 11, 8, 7, 1, 5, 6, 10]",4-4-2,2018-08-25 16:00:00


## the `Observation` class

We provide here a simple class that stores our Environment observations. 

+ The first format is a `numpy` array that encodes game information and that will be passed to the computation framework.
+ The second format is a user-friendly output that can displayed to the screen.

In [None]:
#| export


class Observation:
    def __init__(
        self,
        game_id: int,  # Game Id.
        game_date: datetime.datetime,  # Game Date
        lineups: np.ndarray,  # Lineups(playerName:position), shape=(2,).
        lineups_ids: np.ndarray,  # Lineups opta Ids [list(11 home players Ids),list(11 away players Ids)], shape=(2,).
        lineups_slots: np.ndarray,  # Lineups slots [list(11 home positions Ids),list(11 away positions Ids)], shape=(2,).
        lineups_formation: np.ndarray,  # Lineups formations [home team formation, away team formation], shape=(2,).
        teams_names: np.ndarray,  # Team names (homeTeam name, awayteam name), shape=(2,).
        ra_teams_ids: np.ndarray,  # Teams Real-Analytics Ids [homeTeam Id, awayTeam Id], shape=(2,).
        opta_teams_ids: np.ndarray,  # Teams opta Ids [homeTeam Id, awayTeam Id], shape=(2,).
        betting_market: np.ndarray,  # Odds [[1X2 and Asian Handicap]], shape=(1,5).
        ah_line: float,  # Asian handicap line.
        shape: tuple,  # Observation shape = (30,).
    ):
        # Checks on objects shape compatibilites.
        assert isinstance(
            game_id, np.int64
        ), f"game_id must be an integer. Got {type(game_id)}."
        assert lineups.shape == (
            2,
        ), f"Invalid shape for lineups: {lineups.shape}. Expected (2,)."
        assert lineups_ids.shape == (
            2,
        ), f"Invalid shape for lineups_ids: {lineups_ids.shape}. Expected (2,)."
        assert (
            len(lineups_ids[0]) == 11
        ), f"Invalid Home lineups_ids length: {len(lineups_ids[0])}. Expected 11 players."
        assert (
            len(lineups_ids[1]) == 11
        ), f"Invalid Away lineups_ids length: {len(lineups_ids[1])}. Expected 11 players."
        assert lineups_slots.shape == (
            2,
        ), f"Invalid shape for lineups_slots: {lineups_slots.shape}. Expected (2,)."
        assert (
            len(lineups_slots[0]) == 11
        ), f"Invalid Home lineups_slots length: {len(lineups_slots[0])}. Expected 11 players."
        assert (
            len(lineups_slots[1]) == 11
        ), f"Invalid Away lineups_slots length: {len(lineups_slots[1])}. Expected 11 players."

        assert lineups_formation.shape == (
            2,
        ), f"Invalid shape for lineups_formation: {lineups_formation.shape}. Expected (2,)."
        assert teams_names.shape == (
            2,
        ), f"Invalid shape for teams_names: {teams_names.shape}. Expected 2."
        assert ra_teams_ids.shape == (
            2,
        ), f"Invalid shape for ra_teams_ids: {ra_teams_ids.shape}. Expected (2,)."

        assert opta_teams_ids.shape == (
            2,
        ), f"Invalid shape for opta_teams_ids: {opta_teams_ids.shape}. Expected (2,)."
        assert betting_market.shape == (
            1,
            5,
        ), f"Invalid shape for betting_market: {betting_market.shape}. Expected (1, 5)."
        assert isinstance(
            ah_line, float
        ), f"ah_line must be a float. Got {type(ah_line)}."
        assert shape == (30,), f"Invalid observation_shape: {shape}. Expected (30,)."

        store_attr()

Next, we patch some useful methods to *mimic* the behaviour of a `numpy` array:

In [None]:
#| export


@patch
def __call__(self: Observation) -> Observation:
    "Numpy encoder."
    self.numerical_observation = np.array(
        [self.game_id]
        + list(self.opta_teams_ids)
        + self.lineups_ids[0]
        + self.lineups_ids[1]
        + list(self.betting_market.flatten())
    )
    self.dtype = self.numerical_observation.dtype
    return self


@patch
def reshape(
    self: Observation,
    new_shape: tuple,  # new shape to transform the object in
) -> Observation:
    "Reshape observation."
    self.numerical_observation = self.numerical_observation.reshape(new_shape)
    return self


@patch
def astype(
    self: Observation,
    data_type: str,  # new type to convert to
) -> Observation:
    "Cast observation type."
    self.numerical_observation = self.numerical_observation.astype(data_type)
    return self

Finally, we provide a use friendly formatting method:

In [None]:
#| export


@patch
def pretty(self: Observation) -> pd.DataFrame:
    "User-friendly output"
    self.observation = {
        "gameId": [self.game_id],
        "gameDate": [self.game_date],
        "homeTeam": [self.teams_names[0]],
        "awayTeam": [self.teams_names[1]],
        "homeLineup": self.lineups[0],
        "awayLineup": self.lineups[1],
        "homeFormation": [self.lineups_formation[0]],
        "awayFormation": [self.lineups_formation[1]],
        "odds1": self.betting_market[:, 0:3][0][0],
        "oddsX": self.betting_market[:, 0:3][0][1],
        "odds2": self.betting_market[:, 0:3][0][2],
        "oddsAhHome": self.betting_market[:, 3:][0][0],
        "oddsAhAway": self.betting_market[:, 3:][0][1],
        "ahLine": [self.ah_line],
    }

    return pd.DataFrame(self.observation, index=[0])

In [None]:
# Get 1 example from the extracted data.
fixture = fixtures.head(1)

# Init an Observation.
observation = Observation(
    game_id=fixture.game_optaId.values[0],  # Game Id.
    game_date=fixture.gameDate.values[0],  # Game date.
    lineups=fixture[["homeTeamLineup", "awayTeamLineup"]].values[
        0
    ],  # Lineup (playerName:position).
    lineups_ids=fixture[["homeTeamLineupIds", "awayTeamLineupIds"]].values[
        0
    ],  # Lineups opta Ids [list(11 home players Ids),list(11 away players Ids)].
    lineups_slots=fixture[["homeTeamLineupSlots", "awayTeamLineupSlots"]].values[
        0
    ],  # Lineups slots [list(11 home positions Ids),list(11 away positions Ids)].
    lineups_formation=fixture[["homeTeamFormation", "awayTeamFormation"]].values[
        0
    ],  # Lineups formations [home team formation, away team formation].
    teams_names=fixture[["homeTeamName", "awayTeamName"]].values[
        0
    ],  # Team names (homeTeam name, awayteam name).
    ra_teams_ids=fixture[["homeTeamId", "awayTeamId"]].values[0],
    opta_teams_ids=fixture[["homeTeam_optaId", "awayTeam_optaId"]].values[
        0
    ],  # Teams opta Ids [homeTeam Id, awayTeam Id].
    betting_market=fixture[
        [
            "preGameOdds1",
            "preGameOdds2",
            "preGameOddsX",
            "preGameAhHome",
            "preGameAhAway",
        ]
    ].values,  # Odds [[1X2 and Asian Handicap]].
    ah_line=fixture.LineId.values[0],  # Asian handicap line.
    shape=(30,),  # Observation shape.
)

In [None]:
#| include: false

# Observation format : numpy encoder
num_observation = observation().numerical_observation
num_observation

array([9.90997e+05, 7.00000e+00, 9.40000e+01, 1.21500e+04, 5.91150e+04,
       1.22806e+05, 5.47640e+04, 4.97730e+04, 3.73390e+04, 4.32520e+04,
       1.80804e+05, 1.54050e+04, 9.87700e+04, 1.14283e+05, 7.96020e+04,
       1.99798e+05, 1.15382e+05, 1.14275e+05, 1.76442e+05, 2.23911e+05,
       1.74932e+05, 6.16000e+04, 8.88210e+04, 1.78301e+05, 6.04790e+04,
       2.62000e+00, 2.77000e+00, 3.53000e+00, 1.49000e+00, 2.75000e+00])

In [None]:
pretty_output = observation().pretty()
pretty_output

Unnamed: 0,gameId,gameDate,homeTeam,awayTeam,homeLineup,awayLineup,homeFormation,awayFormation,odds1,oddsX,odds2,oddsAhHome,oddsAhAway,ahLine
0,990997,2018-08-22 18:45:00,Aston Villa,Brentford,"{""Glenn Whelan"": ""DMR"", ""Mile Jedinak"": ""DCL"",...","{""Daniel Bentley"": ""GK"", ""Ezri Konsa"": ""DCR"", ...",4-4-1-1,4-2-3-1,2.62,2.77,3.53,1.49,2.75,0.5


## Betting Environment

### Betting assumptions

Reinforcement Learning is a branch of machine learning (ML) that focuses on the complex and all-encompassing issue of training a system to behave appropriately in a given situation. Only the value of the reward and observations made about the environment are used to drive learning. The generality of this model allows it to be used in a wide range of real-world contexts, from gaming to the improvement of sophisticated industrial procedures.

In this perspective, the environment and the agent are two crucial elements of the Reinforcement Learning (RL) problem. The environment is the Agent's world where it exists and the Agent can engage in interactions with this environment by taking certain actions which cannot change the environment's laws or dynamics.

The goal of this work is to develop an RL environment that simulates a betting strategy. The theory underlying this environment is quite straightforward: 

1. select a *market* and a *selection* (side) based on the available odds. 
2. decide how much to invest (stake-size).
3. compute a profit and loss (PnL) based on the game outcome.

The real word situation can be quite complex with multiple markets available and different betting options. To keep things simple, the agent is only allowed to: 
- place a *small*, *medium*, or a *large* bet.
- select at most **one** betting opportunity out of *home*, *draw*, or *away* for the *1x2* market, and *home* or *away* on the even Asian handicap line.
- the agent is only allowed to pick one size of bets and a unique selection to bet on (out of the 5 options available).
- games are presented to the agent as soon as the official lineup becomes available.

### `gym` environment

Based on the betting assumptions discussed above, we will implement a custom `OpenAI Gym` environment.
+ an observation is an instance of the `Observation` class with the following information:
    - game-identifier 
    - home and away team lineup 
    - available odds for the `1x2` and the even money Asian handicap markets.
+ the action space is discrete with size 15 which corresponds to one bet size (1 out of 3) in one of the available selection (1 out of 5).
+ the reward (the investment return), which can be positive or negative, is then calculated and the agent bank updated.
+ an episode terminates if i) no more games are available to bet on ii) the agent goes bankrupt.

#### Initialize

The environment is initialized by extending the `gym.Env` base class:

In [None]:
#| export


class BettingEnv(gym.Env):
    """OpenAI Gym class for football betting environments."""

    metadata = {"render_modes": ["human"]}

    def __init__(
        self,
        game_info: pd.DataFrame,  # Games with betting odds and other info.
        odds_column_names: list = [
            "preGameOdds1",
            "preGameOdds2",
            "preGameOddsX",
            "preGameAhHome",
            "preGameAhAway",
        ],  # Betting odds column names.
        starting_bank: float = 100.0,  # Starting bank account.
        small_bet: float = SMALL_BET,  # Small bet proportion value.
        medium_bet: float = MEDIUM_BET,  # Medium bet proportion value.
        large_bet: float = LARGE_BET,  # Large bet proportion value.
    ) -> None:
        "Initializes a new environment."

        super().__init__()

        # Games dataframe.
        self._game = game_info.copy()

        # Sort data by date.
        if set(
            ["home_team_lineup_received_at", "away_team_lineup_received_at", "gameDate"]
        ).issubset(set(self._game.columns)):
            # Get max lineup timestamp.
            self._game["lineupReceivedAt"] = self._game[
                ["home_team_lineup_received_at", "away_team_lineup_received_at"]
            ].max(axis=1)
            # Sort.
            self._game = self._game.sort_values(
                by=["lineupReceivedAt", "gameDate"]
            ).reset_index()
            # Get gameDate date part.
            self._game["gameDate"] = pd.to_datetime(self._game["gameDate"]).dt.date
            # Shift the timestamp values by adding an offset based on each row index.
            offset = pd.Timedelta("1 second")
            self._game["lineupReceivedAt"] = (
                self._game["lineupReceivedAt"] + self._game.index.to_series() * offset
            )

        # Games ids.
        self._game_ids = self._game["game_optaId"].values

        # Odds (1X2 and Asian handicap) values.
        self._odds = self._game[odds_column_names].values

        # Ah lines.
        self._lines = self._game["LineId"].values

        # Teams names.
        self._teams_names = self._game[["homeTeamName", "awayTeamName"]].values

        # Teams RA id.
        self._ra_teams_ids = self._game[["homeTeamId", "awayTeamId"]].values

        # Teams Opta id.
        self._teams_ids = self._game[["homeTeam_optaId", "awayTeam_optaId"]].values

        # Teams lineups (names and positions).
        self._lineups = self._game[["homeTeamLineup", "awayTeamLineup"]].values

        # Teams lineups (players opta ids).
        self._lineups_ids = self._game[
            ["homeTeamLineupIds", "awayTeamLineupIds"]
        ].values

        # Teams lineups slots (players positions ids).
        self._lineups_slots = self._game[
            ["homeTeamLineupSlots", "awayTeamLineupSlots"]
        ].values

        # Teams formation.
        self._lineups_formations = self._game[
            ["homeTeamFormation", "awayTeamFormation"]
        ].values

        # Results (homewin -> 0 , draw -> 1, awaywin -> 2).
        self._results = self._game["tgt_outcome"].values

        # Game goal-difference.
        self._gd = self._game["tgt_gd"].values

        # Env balance.
        self.balance, self.starting_bank = starting_bank, starting_bank

        # Current step (game).
        self.current_step = self._game.index[0]

        # Cummulative reward.
        self.cummulative_profit = [0]

        # Cummulative balance.
        self.cummulative_balance = [self.balance]

        # Cummulative bets.
        self.bets = []

        # Actions.
        self.actions_list = Actions(
            *np.array(
                [
                    [0, 0, 0, 0, 0],  # No bets.
                    [small_bet, 0, 0, 0, 0],  # Betting on home team (1x2).
                    [medium_bet, 0, 0, 0, 0],  # Betting on home team (1x2).
                    [large_bet, 0, 0, 0, 0],  # Betting on home team (1x2).
                    [0, 0, small_bet, 0, 0],  # Betting on away team (1x2).
                    [0, 0, medium_bet, 0, 0],  # Betting on away team (1x2).
                    [0, 0, large_bet, 0, 0],  # Betting on away team (1x2).
                    [0, small_bet, 0, 0, 0],  # Betting on draw (1x2).
                    [0, medium_bet, 0, 0, 0],  # Betting on draw (1x2).
                    [0, large_bet, 0, 0, 0],  # Betting on draw (1x2).
                    [0, 0, 0, small_bet, 0],  # Betting on home (Asian Handicap).
                    [0, 0, 0, medium_bet, 0],  # Betting on home (Asian Handicap).
                    [0, 0, 0, large_bet, 0],  # Betting on home (Asian Handicap).
                    [0, 0, 0, 0, small_bet],  # Betting on away (Asian Handicap).
                    [0, 0, 0, 0, medium_bet],  # Betting on away (Asian Handicap).
                    [0, 0, 0, 0, large_bet],  # Betting on away (Asian Handicap).
                ]
            )
        )

        # Plotly figure.
        # Init figure.
        self.fig = go.Figure()

        # Set titles.
        self.fig.update_layout(
            title="Cumulative performance over time",
            xaxis_title="Date",
            yaxis_title="Profit & Bank",
            xaxis=dict(type="category", tickangle=50, tickfont=dict(size=12)),
        )

        # Hide x axis grid.
        self.fig.update_xaxes(showgrid=False)

        # Init figure with initial data.
        self.fig.add_scatter(
            x=[self.current_step], y=self.cummulative_profit, name="Profit"
        )
        self.fig.add_bar(
            x=[self.current_step], y=self.cummulative_balance, name="Balance"
        )

        # Gym action space.
        self.action_space = gym.spaces.Discrete(
            len(self.actions_list)
        )  # Betting action

        # Gym observation space.
        self.observation_space = gym.spaces.Box(
            low=-np.inf,
            high=np.inf,
            shape=(
                self._odds.shape[1] + 25,
            ),  # 25 = 22(players Ids) + 2(home and away team ids) + 1(gameId).
            dtype=np.float64,
        )

    def _get_current_index(
        self,
    ) -> int:  # Current step index.
        "Returns the current index of the current game."
        return self.current_step % self._game.shape[0]

    def get_odds(
        self,
    ) -> np.ndarray:  # Current step (1X2 and Asian Handicap) odds, shape=(1,5).
        "Returns odds for the current step"
        return self._odds[self.current_step].reshape((1, -1))

    def get_bet(
        self,
        action: int,  # The chosen action (integer value) by the agent.
    ) -> (
        np.ndarray
    ):  # Betting choice list of 5 values (4 are 0 and 1 takes (small/medium/large bet size)).
        "Returns the betting matrix for the provided action."
        bet = np.array(self.actions_list[action])
        req_bet_size = bet.max()
        possible_bet_size = min(req_bet_size, self.balance)
        bet[np.argmax(bet)] = possible_bet_size

        return bet

    def create_info(
        self,
        action: int,  # The chosen action by the agent.
    ) -> dict:  # Current step information.
        "Creates the info dictionary for the given action."
        return {
            "current_step": self.current_step,
            "odds": self.get_odds(),
            "bet_action": self.actions_list[action],
            "balance": self.balance,
            "reward": 0,
            "bet_placed": False,
            "gd": None,
            "done": False,
        }

#### Get an `observation`

In [None]:
#| export


@patch
def get_observation(
    self: BettingEnv,
) -> Observation:  # Current Observation instance.
    "Returns the observation of the current step."
    # Current game index.
    index = self._get_current_index()

    # Observation.
    return Observation(
        game_id=self._game_ids[index],
        game_date= self._game["gameDate"][index],
        lineups=self._lineups[index],
        lineups_ids=self._lineups_ids[index],
        lineups_slots=self._lineups_slots[index],
        lineups_formation=self._lineups_formations[index],
        teams_names=self._teams_names[index],
        ra_teams_ids = self._ra_teams_ids[index],
        opta_teams_ids=self._teams_ids[index],
        betting_market=self.get_odds(),
        ah_line=self._lines[index],
        shape=self.observation_space.shape,
    )()

#### `reset` the environment

In [None]:
#| export


@patch
def reset(
    self: BettingEnv,
) -> Observation:  # Initial Observation instance.
    "Resets the state of the environment and returns an initial observation."

    # Reset balance to initial starting bank.
    self.balance = self.starting_bank

    # Reset initial step to 0.
    self.current_step = self._game.index[0]

    # Reset cumm profit and balance.
    self.cummulative_profit = [0]
    self.cummulative_balance = [self.balance]
    self.bets = []

    # Init figure with initial data.
    self.fig = go.Figure()
    # Set titles.
    self.fig.update_layout(
        title="Cumulative performance over time",
        xaxis_title="Date",
        yaxis_title="Profit & Bank",
        xaxis=dict(type="category", tickangle=50, tickfont=dict(size=12)),
    )

    # Hide x axis grid.
    self.fig.update_xaxes(showgrid=False)

    self.fig.add_scatter(
        x=[self.current_step], y=self.cummulative_profit, name="Profit"
    )
    self.fig.add_bar(x=[self.current_step], y=self.cummulative_balance, name="Balance")

    # Return the first observation.
    return self.get_observation()

#### The `step` method

A step in the environment executes the following:
+ Initialize the info collector
+ Check the balance to decide whether the betting should continue or not
+ If some money is available, the bet is executed (even if the money is shorter than the required amount) and the results computed.
+ The bet outcome (reward) is then computed and the agent's balance and bank are updated.
+ Finally, we check if the episode is done by consulting if there are games left in the buffer. 

In [None]:
#| export

STEP = Tuple[Observation, float, bool, Dict]


@patch
def step(
    self: BettingEnv,
    action: int,  # The chosen action by the agent.
) -> STEP:  # Returns (observation, reward, done, info).
    "Run one timestep of the environment's dynamics. It accepts an action and returns a tuple (observation, reward, done, info)"

    # Init observation.
    observation = np.ones(shape=self.observation_space.shape)

    # Reward.
    reward = 0.0

    # Finish flag.
    done = False

    # Initialise info.
    info = self.create_info(action)

    # If no more money.
    if self.balance <= 0.0:
        done = True
    else:
        # Reward (positive or negative).
        _obs_gd = np.array(listify(self._gd[self.current_step]))
        _ah_line = np.array(listify(self._lines[self.current_step]))

        _reward = pnl(
            selection=self.get_bet(action).reshape((1, -1)) * self.starting_bank,
            odds=self.get_odds().reshape((1, -1)),
            obs_gd=_obs_gd,
            ah_line=_ah_line,
        ).squeeze(0)
        reward = _reward[0]

        # Update balance.
        self.balance += reward
        info.update(bet_placed=True)
        # Update info.
        info.update(gd=self._gd[self.current_step])
        info.update(reward=reward)

        # Increment step.
        _next_it = np.where(self._game.index == self.current_step)[0][0] + 1
        if _next_it < self._odds.shape[0]:
            self.current_step = self._game.index[_next_it]
            observation = self.get_observation()
            # Save the action.
            self.bets.append(
                [
                    name.replace("_", " ").capitalize()
                    for name in self.actions_list._fields
                    if (
                        getattr(self.actions_list, name) == self.actions_list[action]
                    ).all()
                ]
            )
            # save current states.
            self.cummulative_profit.append(round(reward, 2))
            self.cummulative_balance.append(round(self.balance, 2))

        else:
            done = True

    # Update flag.
    info.update(done=done)
    # Return results.
    return observation, reward, done, info

In [None]:
#| export


@patch
def render(
    self: BettingEnv,
) -> None:
    "Updates the figure with the current step data."
    # Display Graph.
    # Get current fig data.
    scatter = self.fig.data[0]
    bar = self.fig.data[1]

    if "lineupReceivedAt" in self._game.columns:
        # Fig x-axis is lineups timestamp.
        fig_x_axis = list(
            self._game["lineupReceivedAt"][: self.current_step]
            .dt.strftime("%y-%m-%d %H:%M:%S")
            .values
        )
        # When the bet has not yet begun, provide an empty value for the first initial step.
        fig_x_axis.insert(0, "Inital Step")
    else:
        fig_x_axis = list(range(self.current_step + 1))

    # Update X-axis (0-> current step).
    scatter.x, bar.x = fig_x_axis, fig_x_axis
    # Update Y-axis (profit and current balance).
    scatter.y = self.cummulative_profit
    bar.y = self.cummulative_balance
    # Add hover-text to the fig.
    scatter.text = self.cummulative_balance
    # We want to viz game and bet info (game date, teams, 1X2 and AH odds and the performed action).
    custom_data = np.hstack(
        (
            self._teams_names[: self.current_step],
            self._odds[: self.current_step],
            self.bets,
            self._game["gameDate"][: self.current_step].values.reshape(-1, 1),
            self._game["tgt_outcome"][: self.current_step]
            .map({0.0: "Home Win", 2.0: "Away Win", 1.0: "Draw"})
            .values.reshape(-1, 1),
        )
    )
    # Add this row to Viz Initial state before starting bet.
    initial_step_infos = np.full_like(custom_data[0], "")
    custom_data = np.concatenate([[initial_step_infos], custom_data])
    # Add this info to the figure.
    scatter.customdata = custom_data
    scatter.hovertemplate = "<br><b>Game: </b>%{customdata[0]} VS %{customdata[1]}\
        <br><b>Game Date: </b>%{customdata[8]}\
        <br><b>Game Result: </b>%{customdata[9]}\
        <br><b>1X2 Odds: </b>%{customdata[2]} %{customdata[3]} %{customdata[4]}\
        <br><b>Asian Handicap Odds: </b>%{customdata[5]} %{customdata[6]}\
        <br><b>Bet Action: </b>%{customdata[7]}\
        <br><b>Balance: </b>%{text}\
        <br><b>Profit: </b> %{y}\
        "
    # Display fig.
    self.fig.update_layout(hovermode="x")
    self.fig.show()

# Agent - Env

Here, we'll set up our betting environment and let the computer program play and make decisions at random.

In [None]:
env = BettingEnv(fixtures)
max_steps_limit = fixtures.shape[0]

In [None]:
# Init RL env.
env.reset()

# Init done Flag to False.
done = False

# Init loop counter.
i = 0

# Stops when it is done or when we have bet on all provided games.
while not done and i < max_steps_limit:
    # Make a step.
    obs, reward, done, info = env.step(env.action_space.sample())
    # Increment counter.
    i = i + 1
    
# Display graph.
env.render()

In [None]:
#| hide

import nbdev

nbdev.nbdev_export()