In [None]:
# default_exp model.learner

In [None]:
%load_ext autoreload
%autoreload 2
from IPython.core.debugger import set_trace
from IPython.utils import traitlets as _traitlets

<h1><center> Learner </center></h1>

In this module, we define the model architecture that we will use in our learner. It will be based on [`tsai`](https://github.com/timeseriesAI/tsai) models with a simple tweak: ability to handle categorical features.

The strategy to build our model is fairly simple:
+ apply a specific 'Embedding` layer to each categorical feature
+ concatenate the outcome of the embedding layers with the continuous features
+ pass the resulting tensor to a `tsai` existing model
The first 2 steps can be handled by the *head* of the network which can be seen as a layer.

Once the architecture defined, we can define our learner in the usual way and benefit from `fastai` training loop.

In [None]:
# export
import functools
import glob
import operator
import os
import pickle
import tempfile
import warnings
from pathlib import Path
from random import sample
from typing import List, Tuple

import numpy as np
import pandas as pd
from fastai.layers import trunc_normal_
from fastai.tabular.all import *
from fastai.text.all import *
from fastai.vision.all import *
from fastcore.basics import *
from fastcore.xtras import load_pickle, save_pickle
from progressbar import progressbar
from tsai.all import *

from footSeq.datastruct.core import *
from footSeq.datastruct.possessions import *

In [None]:
device = default_device()
computer_setup()

In [None]:
from footSeq.config.mongo import mongo_init

mongo_init("prod_atlas")

data_path = Path("/sequences")
metadata_path = Path("./data")
files_list_path = metadata_path / "file_list.pkl"

if os.path.isfile(files_list_path):
    all_files = load_pickle(files_list_path)
else:
    files_db = L(
        Path(os.path.join(data_path, obj["file_id"] + ".csv"))
        for obj in progressbar(PossessionMetadata.objects.only("file_id"))
    )
    disk_files = data_path.ls(file_exts=".csv")
    all_files = list(set(files_db) & set(disk_files))
    save_pickle(files_list_path, all_files)

all_files = L(all_files).shuffle()

In [None]:
total_files = 1000000
files = all_files[:total_files]

In [None]:
no_goal_prop = 2
valid_pct = 0.3
seed = 44

In [None]:
train_goals, test_goals, no_goals = pick_files_from_db(total_n_files=len(files), path=data_path, goal_prop=0.95, no_goal_prop=0.5)
n_no_goals_train = int(train_goals.shape[0] * no_goal_prop)
files_info = pd.concat([train_goals, no_goals.sample(n_no_goals_train)], axis=0).sample(
    frac=1, ignore_index=True
)

## test files
n_no_goals_test = int(test_goals.shape[0] * no_goal_prop)
no_goals_left = no_goals[~no_goals.file.isin(files_info.file)].sample(n_no_goals_test)
test_files_info = pd.concat([test_goals, no_goals_left], axis=0).sample(
    frac=1, ignore_index=True
)

labels = L(files_info.target.tolist()).unique(sort=True)

We only want to keep files with at least 2 playing steps:

In [None]:
files_info = files_info[files_info.nSteps >= 2].reset_index()
files_info

In order to test our different examples, let's prepare a batch of data:

In [None]:
cat_names = [
    "standart_name",
    "possession_name",
    "attack_status_name",
    "attack_type_name",
    "under_pressure",
    "high_speed",
    "result_name",
    "action_subtype_name",
    "generic_action_type_name",
    "body_name",
    "is_poss_team",
    "is_att_team",
    "touches",
    "shot_type",
    "shot_handling",
]
cont_names = [
    "start_x",
    "start_y",
    "end_x",
    "end_y",
    "time_seconds",
    "seconds_since_poss",
]

files = L(files_info.file.tolist())


## splits
splits_files = goal_splitter(
    files_info_df=files_info, no_goal_prop=no_goal_prop, valid_pct=valid_pct, seed=seed
)

procs = [FillMissing, Categorify, Normalize]
foot_tfm = FootSeqTransform(
    files=files,
    splits=splits_files,
    labels=labels,
    procs=procs,
    cat_names=cat_names,
    cont_names=cont_names,
)


## to-tensor transform
to_tsr = FootSeqToTensor(
    cat_names=cat_names,
    cont_names=cont_names,
    labels=labels,
    base_path=data_path,
    max_len=10,
)

## tfmdlist
tls = TfmdLists(files, [foot_tfm, to_tsr], splits=splits_files)

## params for datalodaers
train_seq_lens = L(min(get_sequence_len(file), 10) for file in files[splits_files[0]])
valid_seq_lens = L(min(get_sequence_len(file), 10) for file in files[splits_files[1]])


## pass the training dataset sequence lengths to SortedDL
srtd_dl = partial(SortedDL, res=train_seq_lens)

## Pass the validation dataset seq lengths
dl_kwargs = [{}, {"val_res": valid_seq_lens}]

## re-initialise dataloaders
srtd_dls = tls.dataloaders(
    bs=3, before_batch=pad_seq, dl_type=srtd_dl, dl_kwargs=dl_kwargs
)
srtd_batch = srtd_dls.one_batch()

srtd_batch

# Model

## Embedding

In [None]:
# export


@delegates()
class Embedding(nn.Embedding):
    """
    Embedding layer compatible with full pytorch API and truncated normal initialization

    Parameters
    ----------
    ni, nf: int
        input and output size of Embedding layer. It is the same
        as `num_embeddings` and `embedding_dim` in `torch.nn.Embedding()` module
    kwargs: dict
        Any argument accepted by `torch.nn.Embedding()` module
        a part from `num_embeddings` and `embedding_dim`
    std: float
        standard deviation applied in the truncated normal

    """

    def __init__(self, ni, nf, std=0.01, **kwargs):
        kwargs["num_embeddings"], kwargs["embedding_dim"] = ni, nf
        super().__init__(**kwargs)
        trunc_normal_(self.weight.data, std=std)


class MultiEmbedding(Module):
    """
    Muti-dimesnion Embedding layer

    Attributes
    ----------
    cat_embed: torch.nn.ModuleList
        list of Embedding modules in the order in which categorical data appear

    """

    def __init__(
        self,
        n_embeds: List[int],
        embed_dims: List[int] = None,
        embed_p: float = 0.0,
        n_cont: int = 0,
        std: float = 0.01,
        **kwargs
    ):
        """
        Initialise the various embedding sizes

        Parameters
        ----------
        n_embdes: List[int]
            length of the vocabulary of each categorical feature in the same order as passed in the tensor

        embed_dims: List[int]
            required size of each categorical feature embedding in the same order as passed in the tensor

        embed_p: float
            if non zero, applies a dropout layer to the the categorical features after embedding.

        n_cont: int, optional
            number of continuous features

        std: float
            standard deviation applied in the truncated normal
        kwargs: dict
            extra parameters passed to the embedding layer. Should be
            compatible with `torch.nn.Embedding()`

        """
        assert n_cont >= 0, "number of continuous features should be positive"
        self.n_cont = n_cont
        ## verify embedding size
        if embed_dims is None:
            embed_dims = [emb_sz_rule(s) for s in n_embeds]
        else:
            embed_dims = listify(embed_dims)
            if len(embed_dims) == 1:
                embed_dims = embed_dims * len(n_embeds)
            assert len(embed_dims) == len(n_embeds)

        self.emb_drop = nn.Dropout(embed_p)
        self.cat_embed = nn.ModuleList(
            [
                Embedding(ni=n, nf=d, std=std, **kwargs)
                for n, d in zip(n_embeds, embed_dims)
            ]
        )

    def forward(self, x_cat, x_cont=None):
        if isinstance(x_cat, tuple):
            if len(x_cat) == 2:
                x_cat, x_cont = x_cat
            elif len(x_cat) == 3:
                _, x_cat, x_cont = x_cat
            else:
                raise ValueError("x_cat is a tuple of unknown size")

        x_cat = torch.cat([e(x_cat[..., i]) for i, e in enumerate(self.cat_embed)], -1)
        x_cat = self.emb_drop(x_cat)
        if self.n_cont != 0:
            x_cat = torch.cat([x_cat, x_cont], -1)
        return x_cat

In order to test this layer, we need to find the vocabulary size of each categorical variable and pass it in `n_embeds`:

In [None]:
n_embeds = [len(tls.to.classes[n]) for n in tls.to.cat_names]
(tls.to.cat_names, n_embeds)

Now let's initialize the layer and check that it works as expected:

In [None]:
n_cont = srtd_batch[2].shape[-1]
multi_em = MultiEmbedding(n_embeds=n_embeds, n_cont=n_cont).to(device)
tsr_em = multi_em(srtd_batch[1], srtd_batch[2])
test_eq(
    tsr_em.shape[-1],
    L(w.weight.shape[-1] for w in multi_em.cat_embed).sum() + n_cont,
)

Now let's investigate how we can use the `padding_idx` option. This can be very useful to avoid training useless weight corresponding to padding values. Let's first create a batch with some padded values:

In [None]:
reg_dls = tls.dataloaders(bs=5, before_batch=pad_seq)
padded_batch = reg_dls.one_batch()
padded_batch[1][1]

In [None]:
multi_em_pad = MultiEmbedding(n_embeds, padding_idx=0, n_cont=n_cont).to(device)
tsr_em = multi_em_pad(padded_batch[1], padded_batch[2])
tsr_em[0]

Notice how the dimension with all zeros (the default padding index) are also filled in with all zeros in the resulting tensor.

## Full Architecture

Now we are ready to plug in the embedding to any `tsai` learner. Our architecture is fairly straightforward:
+ `head` is the head of the network and runs the data through the `multiEmbedding` layer
+ `body` takes the output of `head` and run it through the desired architecture selected by the user in `ts_arch`

In [None]:
# export


@delegates(build_ts_model)
class MixedSeqModel(Module):
    "Sequence model with an embedding head."

    def __init__(
        self,
        arch: Module,
        n_cont: int,
        c_out: int,
        embded_config: dict = None,
        **kwargs
    ):
        """
        Intialise the model architecture

        Parameters
        ----------
        arch: Module
            one of tsai Model architectures accepted by `build_ts_model()`
        c_out: int
            number of output layers
        n_cont: int
            number of continuous features
        embed_config: dict
            all parameters accepted by the `MultiEmbedding` layer
        kwargs:
            Extra parameters accepted by `build_ts_model()`

        """
        ## head of the network
        embded_config["n_cont"] = n_cont
        self.head = MultiEmbedding(**embded_config)

        ## inialise the body
        self.arch, self.c_out, self.n_cont = arch, c_out, n_cont
        self.c_in = L(w.weight.shape[-1] for w in self.head.cat_embed).sum() + n_cont
        kwargs["arch"], kwargs["c_in"], kwargs["c_out"] = (
            self.arch,
            self.c_in,
            self.c_out,
        )

        self.body = build_ts_model(**kwargs)

    def forward(self, x_meta, x_cat, x_cont):
        if isinstance(x_meta, tuple):
            if len(x_meta) == 2:
                x_cat, x_cont = x_meta
            elif len(x_meta) == 3:
                _, x_cat, x_cont = x_meta
            else:
                raise ValueError("x_meta is a tuple of unknown size")
        x = self.head(x_cat, x_cont)

        return self.body(x.transpose(2, 1))

In [None]:
from tsai.models.RNN_FCN import *

n_cont = padded_batch[2].shape[-1]

## select the architecture
ts_model = LSTM_FCN
ts_args = {"bidirectional": True, "rnn_layers": 2, "shuffle": False}

## LSTM
##ts_model = LSTM
##ts_args = {"n_layers":2, "bidirectional":True}

model = MixedSeqModel(
    arch=ts_model,
    n_cont=n_cont,
    c_out=2,
    embded_config={"n_embeds": n_embeds, "embed_p": 0.1},
    **ts_args
).to(device)
model

In [None]:
model(padded_batch[0], padded_batch[1], padded_batch[2])

# Learner

Defining a learner at this stage is straightforward, we just need to decide on the appropriate loss function to use, pass the `dataloaders` and the metrics we want to track. Moreover, the `tsai` `ts_learner` function provides a great interface that we could extend to meet our purposes: 

## Specific class for the `Learner`
We define a specific class `MixedSeqLearner` that knows how to predict the sequence and how to show results:

In [None]:
# export


class MixedSeqLearner(Learner):
    "`Learner` for mixed sequence data"

    def predict(self, files: List[Path]):
        "Predict a sequence of play read from a file"
        with self.no_bar():
            dl = self.dls.test_dl(listify(files))
            preds, _, cls_preds = self.get_preds(dl=dl, with_decoded=True)
            labels = dl.tfms.target_vocab[0]
            probs = pd.DataFrame(preds.detach().numpy(), columns=labels)
            clss = [labels[i] for i in cls_preds]

        return probs, clss


@delegates(build_ts_model)
def mixed_seq_learner(
    arch: Module,
    n_cont: int,
    c_out: int,
    embded_config: dict = None,
    # learner args
    dls=None,
    splitter=trainable_params,
    loss_func=None,
    opt_func=Adam,
    lr=defaults.lr,
    cbs=None,
    metrics=None,
    path=None,
    model_dir="models",
    wd=None,
    wd_bn_bias=False,
    train_bn=True,
    moms=(0.95, 0.85, 0.95),
    # other model args
    **kwargs
):
    """
    Interface to create a `Learner` for sequences with continuous and categorical features

    Parameters
    ----------
    arch: Module
        one of tsai Model architectures accepted by `build_ts_model()`
    c_out: int
        number of output layers
    n_cont: int
        number of continuous features
    embed_config: dict
        all parameters accepted by the `MultiEmbedding` layer

    Returns
    -------
    fastai.Learner
        Learner object with the `MixedSeqModel` model architecture

    """
    if arch is None:
        arch = LSTM

    model = MixedSeqModel(
        arch=arch, n_cont=n_cont, c_out=c_out, embded_config=embded_config, **kwargs
    )
    try:
        model.body[0], model.body[1]
        subscriptable = True
    except:
        subscriptable = False
    if subscriptable:
        splitter = ts_splitter
    if loss_func is None:
        if hasattr(dls, "loss_func"):
            loss_func = dls.loss_func
        elif hasattr(dls, "train_ds") and hasattr(dls.train_ds, "loss_func"):
            loss_func = dls.train_ds.loss_func
        elif hasattr(dls, "cat") and not dls.cat:
            loss_func = MSELossFlat()

    learn = MixedSeqLearner(
        dls=dls,
        model=model,
        loss_func=loss_func,
        opt_func=opt_func,
        lr=lr,
        cbs=cbs,
        metrics=metrics,
        path=path,
        splitter=splitter,
        model_dir=model_dir,
        wd=wd,
        wd_bn_bias=wd_bn_bias,
        train_bn=train_bn,
        moms=moms,
    )

    # keep track of args for loggers
    store_attr("arch", self=learn)

    return learn

## Useful Learner methods

### Predict the full possession

We are particularly interested in predicting how the probability of scoring changes as the sequence progresses: In order to do that, we need to sort the sequences in acending order and produce the probabilities progressively: 

In [None]:
# export


@patch
def predict_poss(
    self: MixedSeqLearner, seq_df: pd.DataFrame, verbose: bool = False
) -> pd.DataFrame:
    "Predict possession outcome probability in a sequentiel way"

    time_seconds = L(seq_df["time_seconds"].sort_values().tolist()).unique()

    def _get_probs(time):
        _df = seq_df[seq_df["time_seconds"] <= time]
        _dir = tempfile.TemporaryDirectory()
        _file = _df._id.values[0] + ".csv"

        file_name = Path(_dir.name) / _file
        res = None
        _df.to_csv(file_name)
        with self.no_bar():
            _proba = self.predict([Path(file_name)])[0]
        res = pd.DataFrame(
            {
                "time_seconds": time,
                "proba_goal": _proba["goal"],
                "proba_no-goal": 1.0 - _proba["goal"],
            },
            index=[0],
        )

        return res

    if verbose:
        _time_probs = [_get_probs(_time) for _time in progressbar(time_seconds)]
    else:
        _time_probs = [_get_probs(_time) for _time in time_seconds]
    time_probs = (
        pd.concat(_time_probs, axis=0)
        .sort_values(["time_seconds"])
        .reset_index(drop=True)
    )

    return seq_df.merge(time_probs, on="time_seconds", how="left")

#### Predict an entire game

The next step is to be able to predict an entire game. The strategy is the following:
+ extract the data from the database
+ predict the possessions one by one
+ concatenate the data by row

In [None]:
# export


@patch
def predict_game(
    self: MixedSeqLearner,
    game_id: int,
    match_df: pd.DataFrame = None,
    save: bool = False,
) -> pd.DataFrame:
    "Predict (all the possessions in) a game"
    if match_df is None:
        match_df = Possession.get_all_game_poss(game_id)

    _dir = tempfile.TemporaryDirectory()
    poss_info = []

    def _save_poss_files(poss_nbr, sep=self.dls.tfms.sep):
        poss_df = (
            match_df[match_df.possessionNumber == poss_nbr]
            .sequence.tolist()[0]
            .sort_values(["time_seconds"])
        )
        time_seconds = L(poss_df["time_seconds"].sort_values().tolist()).unique()
        game_id, poss_number, start_id, end_id, target = poss_df.possession_id[0].split(
            sep
        )

        def _save_one_file(time):
            ## extract training data
            _df = poss_df[poss_df["time_seconds"] <= time].copy()
            ## create file name
            _id = sep.join(
                [
                    str(game_id),
                    str(poss_number),
                    str(_df.event_id.values[0]),
                    str(_df.event_id.values[-1]),
                    target,
                ]
            )
            _df["target"] = target
            _df["_id"] = _id
            _file_name = _id + ".csv"
            _file_path = Path(_dir.name) / _file_name

            ## add last time stamp
            poss_info.append(_df[_df.time_seconds == time].copy())

            ## save to temporary file
            _df.to_csv(_file_path, index=False)
            return _file_path

        return time_seconds.map(_save_one_file)

    files = L(
        functools.reduce(
            operator.iconcat,
            L(match_df.possessionNumber.tolist()).map(_save_poss_files),
            [],
        )
    )
    poss = pd.concat(poss_info, ignore_index=False).reset_index(drop=True)

    ## preidct and adjust probabilities computed
    probas = self.predict(files)
    probas = probas[0]
    probas["_id"] = files.map(lambda x: x.stem)
    probas = (
        poss.merge(probas, on="_id", how="left")
        .rename(columns={"no-goal": "proba_none", "goal": "proba_goal"})
        .drop(["_id"], axis="columns")
    )

    probas["game_id"] = game_id
    ## add minutes and sec
    probas["minutes"] = probas.apply(
        lambda row: int((row.period_id - 1) * 45 + row.time_seconds // 60), axis=1
    )
    probas["sec"] = probas["time_seconds"].values % 60
    probas = probas.reset_index(drop=True).sort_values(
        ["possession_number", "time_seconds"]
    )

    if save:
        ## remove all existing documents
        ActionValues.objects(game_id=game_id).delete()
        ## create an actionValue object for each row
        lsave = L(ActionValues(**row.to_dict()) for _, row in probas.iterrows())
        ActionValues.objects.insert(lsave)

    return probas

## Save and Load
Finally, we provide a function to load a previously saved learner:

In [None]:
# export


@patch
def save_all(
    self: MixedSeqLearner,
    path="export",
    dls_fname="dls",
    model_fname="model",
    learner_fname="learner",
    do_save_dls=True,
    verbose=False,
):
    path = Path(path)
    if not os.path.exists(path):
        os.makedirs(path)

    if do_save_dls:
        self.dls_type = self.dls.__class__.__name__
        dls_fnames = []
        self.n_loaders = len(self.dls.loaders)
        for i, dl in enumerate(self.dls):
            dl = dl.new(num_workers=1)
            torch.save(dl, path / f"{dls_fname}_{i}.pth")
            dls_fnames.append(f"{dls_fname}_{i}.pth")

    # Saves the model along with optimizer
    self.model_dir = path
    self.save(f"{model_fname}", with_opt=True)

    # Export learn without the items and the optimizer state for inference
    self.export(path / f"{learner_fname}.pkl")

    pv(f"Learner saved:", verbose)
    pv(f"path          = '{path}'", verbose)
    if do_save_dls:
        pv(f"dls_fname     = '{dls_fnames}'", verbose)
    pv(f"model_fname   = '{model_fname}.pth'", verbose)
    pv(f"learner_fname = '{learner_fname}.pkl'", verbose)


def load_all(
    path="export",
    dls_fname="dls",
    model_fname="model",
    learner_fname="learner",
    device=None,
    pickle_module=pickle,
    do_load_dls=False,
    verbose=False,
):
    "Load a learner previously saved"
    if isinstance(device, int):
        device = torch.device("cuda", device)
    elif device is None:
        device = default_device()
    if device == "cpu":
        cpu = True
    else:
        cpu = None

    path = Path(path)
    learn = load_learner(
        path / f"{learner_fname}.pkl", cpu=cpu, pickle_module=pickle_module
    )
    learn.load(f"{model_fname}", with_opt=True, device=device)

    if do_load_dls:
        loaders = []
        dls_fnames = []
        for i in range(learn.n_loaders):
            dl = torch.load(
                path / f"{dls_fname}_{i}.pth",
                map_location=device,
                pickle_module=pickle_module,
            )
            dl = dl.new(num_workers=0)
            dl.to(device)
            first(dl)
            loaders.append(dl)
            dls_fnames.append(f"{dls_fname}_{i}.pth")
        learn.dls = type(learn.dls)(*loaders, path=learn.dls.path, device=device)

    pv(f"Learner loaded:", verbose)
    pv(f"path          = '{path}'", verbose)
    if do_load_dls:
        pv(f"dls_fname     = '{dls_fnames}'", verbose)
    pv(f"model_fname   = '{model_fname}.pth'", verbose)
    pv(f"learner_fname = '{learner_fname}.pkl'", verbose)

    return learn

## Define and Train a learner

### From scratch

Let's define a learner now with the `LSTM_FCN` architecture and find an appropriate learning rate:

In [None]:
from fastai.metrics import *

n_cont = padded_batch[2].shape[-1]

## prepare dataloaders
srtd_dls = tls.dataloaders(
    bs=64, before_batch=pad_seq, dl_type=srtd_dl, dl_kwargs=dl_kwargs
)

## select the architecture
ts_model = LSTM_FCN
ts_args = {"bidirectional": True, "rnn_layers": 2, "shuffle": False}
model_name = "_".join(
    [
        ts_model.__name__,
        f'bidir-{ts_args.get("bidirectional",False)}',
        f'layers-{ts_args.get("rnn_layers", 1)}',
        f"no_goal_prop-{no_goal_prop}",
        "full_data",
    ]
)

## create directory if it does not exist
if not os.path.exists(Path("./models") / model_name):
    os.makedirs(Path("./models") / model_name)

learn = mixed_seq_learner(
    arch=ts_model,
    n_cont=n_cont,
    c_out=2,
    embded_config={"n_embeds": n_embeds, "embed_p": 0.1},
    dls=srtd_dls,
    loss_func=CrossEntropyLossFlat(),
    metrics=[accuracy],
    path=Path("."),
    model_dir=Path("./models") / model_name,
    **ts_args,
)

In [None]:
lr_ = learn.lr_find()

We can train the learner for a number of cycles:

In [None]:
n_cycles = 10
learn.fit_one_cycle(n_cycles, lr_max=lr_[0], cbs=SaveModelCallback(fname=model_name))

Finally, we save the best learner using the `save_all()` method:

In [None]:
learn.save_all(
    path=Path("./models") / model_name,
    dls_fname="dls",
    model_fname="model",
    learner_fname="learner",
    verbose=True,
)

## Predict Unseen Sequences

### Predict from files

We can now compute some predictions on some files. We will select `n_ex` sequences ending with a goal and the same number ending in no-goal:

In [None]:
n_ex = 3

test_files = (
    test_goals.sample(n_ex)["file"].to_list()
    + no_goals[~no_goals.file.isin(files_info.file)].sample(n_ex)["file"].to_list()
)

In [None]:
learn.predict(test_files)

### Visualize results

Looking at raw numbers is fine but it is better to visualize the actions on a pitch. In order to do that, we provide a proper `show_results()` method that knows how to display the sequence together with the predictions:

In [None]:
# export


@typedispatch
def show_results(
    x: tuple,
    y,
    samples,
    outs,
    ctxs=None,
    max_n=6,
    nrows=None,
    ncols=1,
    figsize=None,
    **kwargs,
):
    n_elems = len(samples)
    if max_n > n_elems:
        max_n = n_elems
    if figsize is None:
        figsize = (10, 10 * 2.7)
    if ctxs is None:
        fig, ctxs = get_grid(
            max_n, nrows=None, ncols=ncols, figsize=figsize, return_fig=True
        )

    ## collect learner if available
    if "learner" in kwargs:
        learn = kwargs["learner"]
        labels = learn.dls.tfms.target_vocab[0]
        with learn.no_bar():
            probs, _, pred_cls = learn.get_preds(dl=[x], with_decoded=True)
        probs = pd.DataFrame(probs.detach().numpy(), columns=labels)
        for i, ctx in enumerate(ctxs):
            pred_class = labels[pred_cls[i].item()]
            proba = probs[pred_class].values[i]
            title = f"Actual: {samples[i][1]} \n Prediction: {pred_class} ({proba:.3f})"
            samples[i].show(ctx=ctx, fig=fig, title=title)
    else:
        for i, ctx in enumerate(ctxs):
            title = f'Actual: {samples[i][1]} \n Prediction: {["goal","no_goal"][y[i].item()]}'
            samples[i].show(ctx=ctx, fig=fig, title=title)

In [None]:
base_fig_size = 12

test_dl = learn.dls.test_dl(test_files)
learn.show_results(
    dl=test_dl,
    figsize=(base_fig_size, base_fig_size * 2.7),
    max_n=6,
    ncols=1,
    learner=learn,
)

### Predict full possession

We can predict a full possession in the same way:

In [None]:
goal_file_path = (
    files_info[(files_info.target == "goal") & (files_info.nSteps == 5)]
    .sample(1)["file"]
    .values[0]
)
seq_df = pd.read_csv(goal_file_path)
x2 = learn.predict_poss(seq_df)
x2[
    [
        "type_name",
        "attack_type_name",
        "time_seconds",
        "player_name",
        "start_x",
        "start_y",
        "end_x",
        "end_y",
        "is_poss_team",
        "is_att_team",
        "proba_goal",
        "proba_no-goal",
    ]
]

### Predict a full game

In [None]:
from footSeq.config.mongo import mongo_init

mongo_init("prod_atlas")

game_id = 2162755
game_probs = learn.predict_game(game_id, save=True)
game_probs.tail()