In [None]:
import os
os.environ['NIXTLA_ID_AS_COL'] = '1'

import sys
sys.path.append('../../')

import numpy as np
import pandas as pd

from tqdm.notebook import tqdm
tqdm.pandas()

from src.utils import metrics_scores

label2id = {'COMPLETED': 1, 'FAILED': 0}
id2label = {v: k for k, v in label2id.items()}

In [None]:
df = pd.read_parquet('../../out/parquet/raw.parquet')
df = df[['playerId', 'startTime', 'state', 'counterName', 'target', 'periodTarget']]
df.rename(columns={
    'playerId': 'unique_id',
    'startTime': 'ds',
    'state': 'y'   
}, inplace=True)
df['y'] = df['y'].map(label2id)

counterName_onehot = pd.get_dummies(df['counterName'], prefix='counterName', dtype=np.int8)
df.drop(columns=['counterName'], inplace=True)
df = df.join(counterName_onehot)

df

In [None]:
df.groupby('unique_id').size().max()

In [None]:
def get_static_df(df):
    ids = df['unique_id'].unique()
    static_df = pd.get_dummies(ids, dtype=int, prefix='unique_id')
    stat_exog_list = static_df.columns.tolist()
    static_df['unique_id'] = ids
    return static_df, stat_exog_list

In [None]:
from neuralforecast import NeuralForecast 
from neuralforecast.models import MLP, TFT, GRU, NHITS, LSTM

from neuralforecast.losses.pytorch import DistributionLoss 

In [None]:
static_df, stat_exog_list = get_static_df(df)

args = {
    'h': 2,
    'input_size': 6,
    'loss': DistributionLoss('Bernoulli'),
    'max_steps': 500,
    'scaler_type': 'robust',
    'futr_exog_list': [*counterName_onehot.columns.to_list(), 'target',	'periodTarget'],
    'stat_exog_list': stat_exog_list,
    'start_padding_enabled': True
}

models = [
    MLP(hidden_size=64, **args),
    NHITS(**args),
    TFT(**args),
    GRU(
        h=args['h'],
        input_size=args['input_size'],
        inference_input_size=args['input_size'],
        loss=DistributionLoss('Normal'),
        max_steps=args['max_steps'],
        scaler_type=args['scaler_type'],
        futr_exog_list=args['futr_exog_list'],
        stat_exog_list=args['stat_exog_list'],
    ),
    LSTM(
        h=args['h'],
        input_size=args['input_size'],
        inference_input_size=args['input_size'],
        loss=DistributionLoss('Normal'),
        max_steps=args['max_steps'],
        scaler_type=args['scaler_type'],
        futr_exog_list=args['futr_exog_list'],
        stat_exog_list=args['stat_exog_list'],
    )
]

nf = NeuralForecast(models=models, freq='W')
Y_hat_df = nf.cross_validation(df=df, static_df=static_df, step_size=2, n_windows=10, refit=1)

In [None]:
metrics = []
    
for m in nf.models:
    m_name = str(m)
    loss_dist = m.loss.distribution
    threshold = Y_hat_df[m_name].mean() if loss_dist != 'Bernoulli' else 0.5
    Y_hat_df[m_name] = (Y_hat_df[m_name] > threshold).astype(int)

    metrics.append(Y_hat_df.groupby('ds').apply(lambda x: metrics_scores(x['y'], x[m_name], 1) | {'model': m_name}, include_groups=False))

metrics = pd.concat(metrics)
metrics = pd.DataFrame(metrics.tolist(), columns=['accuracy', 'precision', 'recall', 'f1', 'model'], index=metrics.index).reset_index()

metrics.to_csv('../../out/csv/baseline_metrics.csv', index=False)