# Tabular Model: sequential data, with context

[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/mostly-ai/mostlyai-engine/blob/main/examples/sequential.ipynb)

In [None]:
import pandas as pd
import numpy as np
from mostlyai.engine import TabularARGN

# load original data
url = "https://github.com/mostly-ai/public-demo-data/raw/refs/heads/dev/baseball"
trn_ctx_df = pd.read_csv(f"{url}/players.csv.gz")  # context data
trn_tgt_df = pd.read_csv(f"{url}/batting.csv.gz")  # target data

# create and fit the model with context data
argn = TabularARGN(
    tgt_context_key="players_id",
    ctx_primary_key="id",
    ctx_data=trn_ctx_df,
    max_training_time=2,  # limit training to 2 minutes for demo purposes
    verbose=1,
)
argn.fit(trn_tgt_df)

# generate synthetic samples
syn_tgt_df = argn.sample(n_samples=len(trn_tgt_df))

In [None]:
syn_tgt_df.head(5)

### QUALITY ASSURANCE

#### sequence lengths

In [None]:
trn_seq_lens = trn_tgt_df.groupby("players_id").size()
syn_seq_lens = syn_tgt_df.groupby("players_id").size()
print("tgt: ", np.quantile(trn_seq_lens, np.arange(0, 1.1, 0.1), method="inverted_cdf"))
print("syn: ", np.quantile(syn_seq_lens, np.arange(0, 1.1, 0.1), method="inverted_cdf"))

#### coherence

In [None]:
syn_avg_teams_per_player = syn_tgt_df.groupby("players_id")["team"].nunique().mean().round(1)
trn_avg_teams_per_player = trn_tgt_df.groupby("players_id")["team"].nunique().mean().round(1)
syn_avg_teams_per_player, trn_avg_teams_per_player