In [None]:
#| default_exp datastructure.game_features

In [None]:
#| hide

from IPython.core.debugger import set_trace

%load_ext autoreload
%autoreload 2

#  Games Features
> In this module, we will define the data structure required to extract games features.

In [None]:
#| export

import pandas as pd
import mongoengine
import datetime
import logging

## Games

These features are computed at the game level and contains:
+ **metadata**: general information about game date, team identity, competition, ...
+ **stadium** information: coordinates, ssurface, capacity, ... 
+ **condition** day time, attendane, meteo, ...
+ **dominace**: The head to head record between the home and away team

The document also contains *tagret* information i.e, the different prediction targets such as the match-outcome, score, goal-difference,... They are identified with the `tgt_` prefix.

In [None]:
#| export


class GameFeatures(mongoengine.Document):
    "Store the game-features for a given game identified by its id"

    # Game-id.
    game_id = mongoengine.StringField(db_field="gameId", required=True)
    game_opta_id = mongoengine.IntField(db_field="game_optaId", required=False)

    # Game-date/time.
    game_date = mongoengine.DateTimeField(db_field="gameDate", required=True)
    season_id = mongoengine.IntField(db_field="seasonId", required=True)

    # Competition.
    competition_id = mongoengine.StringField(db_field="competitionId", required=True)

    # Team-ids.
    # Home.
    home_team_id = mongoengine.StringField(db_field="homeTeamId", required=True)
    ht_opta_id = mongoengine.IntField(db_field="homeTeam_optaId", required=False)

    # Away.
    away_team_id = mongoengine.StringField(db_field="awayTeamId", required=True)
    at_opta_id = mongoengine.IntField(db_field="awayTeam_optaId", required=False)

    # Stadium.
    venue_id = mongoengine.StringField(db_field="venueId", required=True)
    city = mongoengine.StringField(db_field="city", required=False)
    country = mongoengine.StringField(db_field="country", required=False)
    field_surface = mongoengine.StringField(db_field="surface", required=True)
    field_length_m = mongoengine.IntField(db_field="field_length_m", required=False)
    field_width_m = mongoengine.IntField(db_field="field_width_m", required=False)
    venue_capacity = mongoengine.IntField(db_field="capacity", required=False)
    venue_latitude = mongoengine.FloatField(db_field="lat", required=False)
    venuelongitude = mongoengine.FloatField(db_field="long", required=False)

    # Condition.
    is_closed_doors = mongoengine.BooleanField(
        db_field="is_behind_closed_doors", required=False
    )

    # Head to head.
    home_dominance = mongoengine.FloatField(
        db_field="homeTeamDominance", min_value=0, max_value=1, required=True
    )
    away_dominance = mongoengine.FloatField(
        db_field="awayTeamDominance", min_value=0, max_value=1, required=True
    )

    # Targets.
    tgt_half_gd = mongoengine.IntField(db_field="tgt_half_gd", required=False)
    tgt_gd = mongoengine.IntField(db_field="tgt_gd", required=False)
    tgt_htg = mongoengine.IntField(db_field="tgt_homeTeamGoals", required=False)
    tgt_atg = mongoengine.IntField(db_field="tgt_awayTeamGoals", required=False)
    tgt_half_htg = mongoengine.IntField(
        db_field="tgt_half_homeTeamGoals", required=False
    )
    tgt_half_atg = mongoengine.IntField(
        db_field="tgt_half_awayTeamGoals", required=False
    )
    tgt_outcome = mongoengine.FloatField(db_field="tgt_outcome", required=False)
    tgt_half_outcome = mongoengine.FloatField(
        db_field="tgt_half_outcome", required=False
    )

    meta = {
        "db_alias": "features",
        "collection": "gameFeatures_test",
    }

    @classmethod
    def get_all_games(
        cls,
        limit: int = None,  # Number of rows.
    ):  # GamesFeatures output.
        "Extract all games."
        return cls.objects().order_by("gameDate").limit(limit)

    @classmethod
    def get_game(
        cls,
        ra_game_id: str,  # Real-analytics game identifier.
    ):  # GamesFeatures output.
        "Extract a single game by its gameId."
        return cls.objects(game_id=ra_game_id).first()

In [None]:
from fastbet.config.mongo import mongo_init
from fastbet.config.localconfig import CONFIG, DB_HOSTS

In [None]:
# Initialise connections.
mongo_init(db_hosts=DB_HOSTS, config=CONFIG, db_host="public_atlas")

# Get all fixtures.
fixtures = GameFeatures.get_all_games()

# Fixtures.

fixtures = pd.DataFrame(fixtures.as_pymongo())
fixtures.head()

Unnamed: 0,_id,gameId,game_optaId,gameDate,seasonId,competitionId,homeTeamId,homeTeam_optaId,awayTeamId,awayTeam_optaId,...,homeTeamDominance,awayTeamDominance,tgt_half_gd,tgt_gd,tgt_homeTeamGoals,tgt_awayTeamGoals,tgt_half_homeTeamGoals,tgt_half_awayTeamGoals,tgt_outcome,tgt_half_outcome
0,63ff6730078c55548211e113,174dba7291174b4dbbfa9ea12dd944bb45bdd8ed905524...,990997,2018-08-22 18:45:00,2018,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,126905d14981e6b97912ad4fec354035ccef26cb8ec4e1...,7,419088133137a53bfdb1b7e2e682d223d33a6fa075bbfe...,94,...,0.166667,0.666667,0,0,2,2,1,1,0.5,0.5
1,63ff6730078c55548211e114,219ef70c0e8a803ec1efdb793443edfaa32398690c7829...,991003,2018-08-22 18:45:00,2018,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,aeb2f56fcedbcf4cd5c780179766996c7bf0b308064541...,5,f8daf96ad35eebf1c0a5886c72734ba7dec366d6637052...,108,...,0.25,0.75,-2,0,2,2,0,2,0.5,0.0
2,63ff6730078c55548211e115,0655e244d8d596b5572e86426e2a7ca6178044efa59437...,991013,2018-08-25 14:00:00,2018,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,9ee012a80cade2df55b71580bf5e238bcd6be6f696fdc1...,45,38ca605bcd29a5a37697ca66e533ae817ced71b6bf275c...,2,...,0.333333,0.583333,-2,-3,0,3,0,2,0.0,0.0
3,63ff6730078c55548211e116,019c223b4a03917c2f1685beab4d5d278f7bff3913f239...,991018,2018-08-25 14:00:00,2018,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,eb89c068ca204a72408360450847a990c97c5b5ff0ec9f...,110,bbb63e4ea54b0d60b48a1f8440254d7e656dfbfcbef825...,88,...,0.555556,0.222222,1,2,2,0,1,0,1.0,1.0
4,63ff6730078c55548211e117,0f9ad12eec9f24277ab491f5f26f610eaa918903a34147...,991014,2018-08-25 16:30:00,2018,2938f6103c8ba81a5c9a2822113eab2b2bcd4f175d655f...,04c71986b6503ba5b09a7098ceb79954d20049f21ba45b...,17,95d3bddc19a15d34a7876dcffc1a3e9bc63d809b69308a...,41,...,0.611111,0.277778,-1,0,2,2,0,1,0.5,0.0


In [None]:
#| hide

import nbdev

nbdev.nbdev_export()