In [None]:
#| default_exp datastructure.game_features

In [None]:
#| hide

from IPython.core.debugger import set_trace

%load_ext autoreload
%autoreload 2

#  Games Features
> In this module, we will define the data structure required to extract games features.

In [None]:
#| export

import pandas as pd
import mongoengine
import datetime
import logging

## Games

These features are computed at the game level and contains:
+ **metadata**: general information about game date, team identity, competition, ...
+ **stadium** information: coordinates, ssurface, capacity, ... 
+ **condition** day time, attendane, meteo, ...
+ **dominace**: The head to head record between the home and away team

The document also contains *tagret* information i.e, the different prediction targets such as the match-outcome, score, goal-difference,... They are identified with the `tgt_` prefix.

In [None]:
#| export


class GameFeatures(mongoengine.Document):
    "Store the game-features for a given game identified by its id"

    # Game-id.
    game_id = mongoengine.StringField(db_field="gameId", required=True)
    game_opta_id = mongoengine.IntField(db_field="game_optaId", required=False)

    # Game-date/time.
    game_date = mongoengine.DateTimeField(db_field="gameDate", required=True)
    season_id = mongoengine.IntField(db_field="seasonId", required=True)

    # Competition.
    competition_id = mongoengine.StringField(db_field="competitionId", required=True)

    # Team-ids.
    # Home.
    home_team_id = mongoengine.StringField(db_field="homeTeamId", required=True)
    ht_opta_id = mongoengine.IntField(db_field="homeTeam_optaId", required=False)

    # Away.
    away_team_id = mongoengine.StringField(db_field="awayTeamId", required=True)
    at_opta_id = mongoengine.IntField(db_field="awayTeam_optaId", required=False)

    # Stadium.
    venue_id = mongoengine.StringField(db_field="venueId", required=True)
    city = mongoengine.StringField(db_field="city", required=False)
    country = mongoengine.StringField(db_field="country", required=False)
    field_surface = mongoengine.StringField(db_field="surface", required=True)
    field_length_m = mongoengine.IntField(db_field="field_length_m", required=False)
    field_width_m = mongoengine.IntField(db_field="field_width_m", required=False)
    venue_capacity = mongoengine.IntField(db_field="capacity", required=False)
    venue_latitude = mongoengine.FloatField(db_field="lat", required=False)
    venuelongitude = mongoengine.FloatField(db_field="long", required=False)

    # Condition.
    is_closed_doors = mongoengine.BooleanField(
        db_field="is_behind_closed_doors", required=False
    )

    # Head to head.
    home_dominance = mongoengine.FloatField(
        db_field="homeTeamDominance", min_value=0, max_value=1, required=True
    )
    away_dominance = mongoengine.FloatField(
        db_field="awayTeamDominance", min_value=0, max_value=1, required=True
    )

    # Targets.
    tgt_half_gd = mongoengine.IntField(db_field="tgt_half_gd", required=False)
    tgt_gd = mongoengine.IntField(db_field="tgt_gd", required=False)
    tgt_htg = mongoengine.IntField(db_field="tgt_homeTeamGoals", required=False)
    tgt_atg = mongoengine.IntField(db_field="tgt_awayTeamGoals", required=False)
    tgt_half_htg = mongoengine.IntField(
        db_field="tgt_half_homeTeamGoals", required=False
    )
    tgt_half_atg = mongoengine.IntField(
        db_field="tgt_half_awayTeamGoals", required=False
    )
    tgt_outcome = mongoengine.FloatField(db_field="tgt_outcome", required=False)
    tgt_half_outcome = mongoengine.FloatField(
        db_field="tgt_half_outcome", required=False
    )

    meta = {
        "db_alias": "features",
        "collection": "gameFeatures_test",
    }

    @classmethod
    def get_all_games(
        cls,
        limit: int = None,  # Number of rows.
    ):  # GamesFeatures output.
        "Extract all games."
        return cls.objects().order_by("gameDate").limit(limit)

    @classmethod
    def get_game(
        cls,
        ra_game_id: str,  # Real-analytics game identifier.
    ):  # GamesFeatures output.
        "Extract a single game by its gameId."
        return cls.objects(game_id=ra_game_id).first()

In [None]:
from fastbet.config.mongo import mongo_init

In [None]:
# Initialise connections.
mongo_init(db_host="public_atlas")

# Get all fixtures.
fixtures = GameFeatures.get_all_games(limit=5)

# Fixtures.

fixtures = pd.DataFrame(fixtures.as_pymongo())
fixtures.head()

Unnamed: 0,_id,game_optaId,gameDate,seasonId,competition_optaId,time_period,tgt_half_gd,tgt_gd,tgt_homeTeamGoals,tgt_awayTeamGoals,...,homeTeamManagerDays,awayTeamManagerId,awayTeamManagerName,awayTeamManagerDays,homeTeamDominance,awayTeamDominance,competitionId,gameId,homeTeamId,awayTeamId
0,c0c48eee0b1a42e0d84cb0a947fe2c64f9e1aa7015922f...,990998,2018-08-21 18:45:00,2018,10,2018_34,0,2,2,0,...,51,37257695f892331fe6751da4b1ad9963fb3c7687386bd7...,Paul Hurst,51,0.5,0.5,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,c0c48eee0b1a42e0d84cb0a947fe2c64f9e1aa7015922f...,bc9d5de208258f2f95282c59e9551310be9d319ebc6e4e...,4a625f945d8f58984be0aa7b2ac6409a23ed9cf48e4260...
1,3a604f5616b39eb17fc8d1eed07d5248e387bf400294b2...,991000,2018-08-21 18:45:00,2018,10,2018_34,-1,-3,0,3,...,51,4eb3ad93d4e0f3bbd1839b367ec794ad3d3ec904da107b...,Lee Johnson,927,0.444444,0.444444,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,3a604f5616b39eb17fc8d1eed07d5248e387bf400294b2...,e2bfbb5453a7853e049b9434db74d4d06b8c5560ff7cf9...,d6fe4a4ffbf1e1a0ae9d4bbed16e94042d9bf01e57eb55...
2,58b1242154c8055252582229abfc4680460278834c4433...,991001,2018-08-21 18:45:00,2018,10,2018_34,-1,-1,2,3,...,630,7138490bd99d55d01e6969fa9ef3b13abd25e01ad89b86...,Nigel Adkins,257,0.5,0.5,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,58b1242154c8055252582229abfc4680460278834c4433...,58301066042bbdf19de8fe7d41afc53626b5aa79034712...,bbb63e4ea54b0d60b48a1f8440254d7e656dfbfcbef825...
3,d0cc49c3230e300b529b270951b3b70b3224481add8354...,991007,2018-08-21 18:45:00,2018,10,2018_34,0,0,2,2,...,51,e7b70dea7986e57aabb62e50045a5063d659aaa536957b...,Marcelo Bielsa,67,0.5,0.5,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,d0cc49c3230e300b529b270951b3b70b3224481add8354...,9ca1f9a87934693b07890de4b4528b0f3ae4065a67ec38...,38ca605bcd29a5a37697ca66e533ae817ced71b6bf275c...
4,174dba7291174b4dbbfa9ea12dd944bb45bdd8ed905524...,990997,2018-08-22 18:45:00,2018,10,2018_34,0,0,2,2,...,679,4d2e946dcfa0beaf190f55a472e83a3579ce6700a933e7...,Dean Smith,995,0.166667,0.666667,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,174dba7291174b4dbbfa9ea12dd944bb45bdd8ed905524...,126905d14981e6b97912ad4fec354035ccef26cb8ec4e1...,419088133137a53bfdb1b7e2e682d223d33a6fa075bbfe...


In [None]:
#| hide

import nbdev

nbdev.nbdev_export()