In [None]:
import pandas as pd
from mostlyai.sdk import MostlyAI

# load original time series data
repo_url = 'https://github.com/mostly-ai/public-demo-data/raw/refs/heads/dev/cdnow/'
df_original_purchases = pd.read_csv(f'{repo_url}/purchases.csv.gz')[['users_id', 'date', 'cds', 'amt']]

# ensure correct data type for DATE column
df_original_purchases['date'] = pd.to_datetime(df_original_purchases['date'])

# extract subject table from time-series data
df_original_users = df_original_purchases[['users_id']].drop_duplicates()

# instantiate SDK
mostly = MostlyAI()

# train a generator
g = mostly.train(config={
    'name': 'CDNOW',                      # name of the generator
    'tables': [{                          # provide list of table(s)
        'name': 'users',                  # name of the table
        'data': df_original_users,        # the original data as pd.DataFrame
        'primary_key': 'users_id',
    }, {
        'name': 'purchases',              # name of the table
        'data': df_original_purchases,    # the original data as pd.DataFrame
        'foreign_keys': [                 # foreign key configurations
            {'column': 'users_id', 'referenced_table': 'users', 'is_context': True},
        ],
        'tabular_model_configuration': {
            'max_training_time': 2,       # cap runtime for demo; set None for max accuracy
        },
    }],
})

# show Model QA reports
g.reports(display=True)

In [None]:
syn = mostly.probe(g, size=1000)
syn['purchases'].sort_values(['users_id', 'date'])