In [None]:
import sys
import time
from contextlib import contextmanager


sys.path.append('../')

from src.parser import *
from src.train import *
from src.config import Config
from src.dataset_helper import *

@contextmanager
def timer(name):
    s = time.time()
    yield
    elapsed = time.time() - s
    print(f"[{name}] {elapsed:.3f}s")
    
TARGET_COLS = ['target1', 'target2', 'target3', 'target4']
DATA_DIR = '../input/mlb-player-digital-engagement-forecasting'
EXP_NAME = ''
ARTIFACT_DIR = 'artifacts'
OUTPUT_DIR = ARTIFACT_DIR + '/gbdt'
USE_UPDATED = True
FEATURES = [
    #'f000',
    'f022',
    'f001',
    'f002',
    'f024',
    #'f003',
    #'f004',
    'f023',
    'f005', 
    #'f006',
    'f021',
    #'f007',
    'f014',
    #'f017',
    'f037',
    'f100',
    'f102',
    'f103',
    'f105',
    'f110',
    'f111',
    'f120',
    'f121',
    'f131',

    'f300',
    'f400',
    'f401',
    'f402',
    'f403',
    'f404',
    'f408',
    'f410',
    
    'f020',
    'f303'
]

FEATURES_EXTRA = {
    'target1': ['f028', 'f138', 'f301', 'f029', 'f707', 'f418'],
    'target2': ['f007', 'f026', 'f029', 'f418'],
    'target3': ['f028', 'f138', 'f136'],
    'target4': ['f301']
}

FEATURES_PER_LAG = {
    0: ['f150', 'f151'],
    3: ['f150', 'f151'],
    7: ['f150', 'f151']
}

#wandb.login()

In [None]:
with timer('load'):
    store = Store.train(DATA_DIR, use_updated=USE_UPDATED)

season_df = pd.read_csv(os.path.join(DATA_DIR, 'seasons.csv'))
df_train = make_df_base_from_train_engagement(load_subdata(DATA_DIR, 'nextDayPlayerEngagement', USE_UPDATED))

In [None]:
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')


params = [
    {
        'objective': 'mae',
        'metrics': 'mae',
        'max_depth': 12,
        'n_estimators': 2000,
        'colsample_bytree': 0.7,
        'num_leaves': 512,
        'min_child_samples': 40,
        'bagging_fraction': 0.7,
        'bagging_freq': 1
    },
    {
        'objective': 'mae',
        'metrics': 'mae',
        'max_depth': 12,
        'n_estimators': 2000,
        'colsample_bytree': 0.7,
        'num_leaves': 164,
        'min_child_samples': 40,
        'bagging_fraction': 0.7,
        'bagging_freq': 1
    },
    {
        'objective': 'mae',
        'metrics': 'mae',
        'max_depth': 12,
        'n_estimators': 2000,
        'colsample_bytree': 0.7,
        'num_leaves': 384,
        'min_child_samples': 40,
        'bagging_fraction': 0.7,
        'bagging_freq': 1
    },
    {
        'objective': 'mae',
        'metrics': 'mae',
        'max_depth': 12,
        'n_estimators': 2000,
        'colsample_bytree': 0.7,
        'num_leaves': 512,
        'min_child_samples': 40,
        'bagging_fraction': 0.7,
        'bagging_freq': 1
    },
]


if True:
    # submit
    config = Config(
        lags=[0, 3, 7, 14, 21, 28, 35, 45],
        features=FEATURES,
        features_per_lag=FEATURES_PER_LAG,
        features_per_target=FEATURES_EXTRA,
        train_full=True,
        upload=True,
        extra_df_on=['target1', 'target2', 'target3', 'target4'],
        second_order_features=True
    )
else:
    # local validation
    config = Config(
        lags=[28],
        features=FEATURES,
        features_per_lag=FEATURES_PER_LAG,
        features_per_target=FEATURES_EXTRA,
        train_full=False,
        upload=False,
        extra_df_on=['target1', 'target2', 'target3', 'target4']
    )

extra_df=pd.read_feather(os.path.join(ARTIFACT_DIR, 'events_oof_asof_4tgt_3.f'))

train(params, df_train, store, config,
      season_df=season_df, extra_df=extra_df, output_dir=OUTPUT_DIR, upload_dir=ARTIFACT_DIR, num_seeds=3)