Credit to @columbia2131 - I started with his notebook and then added an external data set with descriptive statistics of the targets for each player.

## About Dataset

generated by the code below

In [None]:
%%capture
"""
!pip install pandarallel 

import gc

import numpy as np
import pandas as pd
from pathlib import Path

from pandarallel import pandarallel
pandarallel.initialize()

BASE_DIR = Path('../input/mlb-player-digital-engagement-forecasting')
train = pd.read_csv(BASE_DIR / 'train.csv')

null = np.nan
true = True
false = False

for col in train.columns:

    if col == 'date': continue

    _index = train[col].notnull()
    train.loc[_index, col] = train.loc[_index, col].parallel_apply(lambda x: eval(x))

    outputs = []
    for index, date, record in train.loc[_index, ['date', col]].itertuples():
        _df = pd.DataFrame(record)
        _df['index'] = index
        _df['date'] = date
        outputs.append(_df)

    outputs = pd.concat(outputs).reset_index(drop=True)

    outputs.to_csv(f'{col}_train.csv', index=False)
    outputs.to_pickle(f'{col}_train.pkl')

    del outputs
    del train[col]
    gc.collect()
"""

## DATA PREPARATION

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.metrics import mean_absolute_error
from datetime import timedelta
from functools import reduce
from tqdm import tqdm
import lightgbm as lgbm
import mlb
import gc

In [None]:
BASE_DIR = Path('../input/mlb-player-digital-engagement-forecasting')
TRAIN_DIR = Path('../input/mlb-pdef-train-dataset')

In [None]:
players = pd.read_csv(BASE_DIR / 'players.csv')

rosters = pd.read_pickle(TRAIN_DIR / 'rosters_train.pkl')
targets = pd.read_pickle(TRAIN_DIR / 'nextDayPlayerEngagement_train.pkl')
scores = pd.read_pickle(TRAIN_DIR / 'playerBoxScores_train.pkl')
scores = scores.groupby(['playerId', 'date']).sum().reset_index()

In [None]:
targets_id = ['playerId','date']
TGTCOLS = ['target1', 'target2', 'target3', 'target4']
rosters_id = ['playerId']
rosters_cat = ["statusCode","status"]


pscores_id = ["playerId", 'teamId'] #, "gamePk"
pscores_cat = ["jerseyNum","positionCode","positionName","positionType"]
pscores_num = ['home', 
               'gamesPlayedBatting', 'flyOuts', 'groundOuts', 'runsScored', 'doubles', 'triples', 
               'homeRuns', 'strikeOuts', 'baseOnBalls', 'intentionalWalks', 'hits', 'hitByPitch', 
               'atBats', 'caughtStealing', 'stolenBases', 'groundIntoDoublePlay', 'groundIntoTriplePlay', 
               'plateAppearances', 'totalBases', 'rbi', 'leftOnBase', 'sacBunts', 'sacFlies', 
               'catchersInterference', 'pickoffs', 'gamesPlayedPitching', 'gamesStartedPitching', 
               'completeGamesPitching', 'shutoutsPitching', 'winsPitching', 'lossesPitching', 'flyOutsPitching', 
               'airOutsPitching', 'groundOutsPitching', 
               'runsPitching', 'doublesPitching', 'triplesPitching', 'homeRunsPitching', 'strikeOutsPitching', 
               'baseOnBallsPitching', 'intentionalWalksPitching', 'hitsPitching', 'hitByPitchPitching', 
               'atBatsPitching', 'caughtStealingPitching', 'stolenBasesPitching', 'inningsPitched', 
               'saveOpportunities', 'earnedRuns', 'battersFaced', 'outsPitching', 'pitchesThrown', 
               'balls', 'strikes', 'hitBatsmen', 'balks', 'wildPitches', 'pickoffsPitching', 'rbiPitching', 
               'gamesFinishedPitching', 'inheritedRunners', 'inheritedRunnersScored', 'catchersInterferencePitching', 
               'sacBuntsPitching', 'sacFliesPitching', 'saves', 'holds', 'blownSaves', 'assists', 
               'putOuts', 'errors', 'chances']

stats_cols = ['target1_mean','target1_median','target1_std','target1_min','target1_max',
              'target1_prob',
              'target2_mean','target2_median','target2_std','target2_min','target2_max',
              'target2_prob',
              'target3_mean','target3_median','target3_std','target3_min',
              'target3_max','target3_prob',
              'target4_mean','target4_median','target4_std',
              'target4_min','target4_max','target4_prob']
#
feature_col = pscores_num + stats_cols

In [None]:
player_target_stats = pd.read_csv("../input/player-target-stats/player_target_stats.csv")
data_names=player_target_stats.columns.values.tolist()

In [None]:
# creat dataset
train = targets[targets_id + TGTCOLS]
train = train.merge(rosters[rosters_id + rosters_cat + ["date"]], 
                    on=['playerId', 'date'], how='left')
train = train.merge(scores[pscores_id  + pscores_num  + ["date"]], 
                    on=['playerId', 'date'], how='left')
train = train.merge(player_target_stats, how='inner', left_on=["playerId"],right_on=["playerId"])

In [None]:
%%time
X = train[feature_col].fillna(0.).values
y = train[TGTCOLS].values

## MODEL

In [None]:
import tensorflow as tf
import tensorflow.keras.layers as L
import tensorflow.keras.models as M
from sklearn.metrics import mean_absolute_error, mean_squared_error
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

In [None]:
def make_model(n_in):
    inp = L.Input(name="inputs", shape=(n_in,))
    nh = 50
    x = L.Dense(nh, activation="relu", name="d1")(inp)
    x = L.Dense(nh, activation="relu", name="d2")(x)
    #x = L.Dense(nh, activation="relu", name="d3")(x)
    preds = L.Dense(4, activation="linear", name="preds")(x)
    
    model = M.Model(inp, preds, name="ANN")
    model.compile(loss="mean_absolute_error", optimizer="adam")
    return model

In [None]:
net = make_model(X.shape[1])
print(net.summary())

In [None]:
net.fit(X, y, batch_size=30_000, epochs=5)
gc.collect()

## Inference

In [None]:
null = np.nan
true = True
false = False

env = mlb.make_env() # initialize the environment
iter_test = env.iter_test() # iterator which loops over each date in test set

for (test_df, sample_prediction_df) in iter_test: # make predictions here
    
    sample_prediction_df = sample_prediction_df.reset_index(drop=True)
    
    # creat dataset
    sample_prediction_df['playerId'] = sample_prediction_df['date_playerId']\
                                        .map(lambda x: int(x.split('_')[1]))
    # Dealing with missing values
    if test_df['rosters'].iloc[0] == test_df['rosters'].iloc[0]:
        test_rosters = pd.DataFrame(eval(test_df['rosters'].iloc[0]))
    else:
        test_rosters = pd.DataFrame({'playerId': sample_prediction_df['playerId']})
        for col in rosters.columns:
            if col == 'playerId': continue
            test_rosters[col] = np.nan
            
    if test_df['playerBoxScores'].iloc[0] == test_df['playerBoxScores'].iloc[0]:
        test_scores = pd.DataFrame(eval(test_df['playerBoxScores'].iloc[0]))
    else:
        test_scores = pd.DataFrame({'playerId': sample_prediction_df['playerId']})
        for col in scores.columns:
            if col == 'playerId': continue
            test_scores[col] = np.nan
    test_scores = test_scores.groupby('playerId').sum().reset_index()
    test = sample_prediction_df[['playerId']].copy()
    test = test.merge(test_rosters[rosters_id + rosters_cat], on='playerId', how='left')
    test = test.merge(test_scores[pscores_id  + pscores_num], on='playerId', how='left')
    test = test.merge(player_target_stats, how='inner', left_on=["playerId"],right_on=["playerId"])
    
    Xe = test[feature_col].fillna(0.).values
    pe = net.predict(Xe)
    pe = np.clip(pe, 0, 100)
    
    # predict
    # to come...
    
    # merge submission
    sample_prediction_df[TGTCOLS] = pe
    sample_prediction_df = sample_prediction_df.fillna(0.)
    del sample_prediction_df['playerId']
    
    env.predict(sample_prediction_df)

In [None]:
sample_prediction_df.head()

In [None]:
sample_prediction_df.max()