In [None]:
import pandas as pd
import numpy as np

from datetime import timedelta
from tensorflow.keras.models import load_model

In [None]:
player_engagement = pd.read_csv('../input/mlb-forecasting-ann/player_engagement.csv')
player_engagement = player_engagement.drop(columns=['engagementMetricsDate', 'year'])
player_engagement['date'] = pd.to_datetime(player_engagement['date'])
player_engagement.head()

In [None]:
mean_df = pd.read_csv('../input/mlb-forecasting-ann/player_engagement_mean_yearly.csv')
mean_df['year'] = mean_df['year'].astype('period[A-DEC]')
mean_df.head()

In [None]:
lag = 17

n_fold = 5

def prediction(df):
    df = df.reset_index()
    df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
    df['playerId'] = df['date_playerId'].apply(lambda x: x.split('_')[1]).astype(int)
    df['year'] = df['date'].dt.to_period('Y')
    
    for x in range(lag):
        df['date'] = df['date'] - timedelta(days=1)
        df = df.merge(player_engagement, how='left', on=['date', 'playerId'], suffixes=['',f'_{x+1}'])
        df = df.fillna(0.)
        
    df = df.merge(mean_df, how='left', on=['playerId', 'year'])
    df = df.fillna(0.)
    df = df.drop(columns=['date', 'playerId', 'year'])
    
    feature_columns = [x for x in df.columns[5:]]

    target_columns = [x for x in df.columns[1:5]]
    
    pred = np.zeros(df[target_columns].shape)
    for x in range(n_fold):
        model = load_model(f'../input/mlb-forecasting-ann/best_model_fold{x+1}.h5')
        
        pred += model.predict(df[feature_columns].to_numpy())
    
    pred = pred / n_fold
    
    return pred

In [None]:
import mlb

env = mlb.make_env() # initialize the environment
iter_test = env.iter_test() # iterator which loops over each date in test set

target_columns = ['target1', 'target2', 'target3', 'target4']

for (test_df, sample_prediction_df) in iter_test:
    targets = prediction(sample_prediction_df)
    sample_prediction_df[target_columns] = np.clip(targets, 0, 100)
    sample_prediction_df = sample_prediction_df.fillna(0.)
    env.predict(sample_prediction_df)