In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from google.colab import drive
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
!pip install keras-tuner
!pip install tensorflow_addons
import tensorflow_addons as tfa
#from tensorflow.keras import mixed_precision
import keras_tuner as kt
#policy = mixed_precision.Policy('mixed_float16')
#mixed_precision.set_global_policy(policy)
keras.utils.set_random_seed(9)
drive.mount('/content/drive')

Collecting keras-tuner
  Downloading keras_tuner-1.1.2-py3-none-any.whl (133 kB)
[?25l[K     |██▌                             | 10 kB 34.1 MB/s eta 0:00:01[K     |█████                           | 20 kB 8.8 MB/s eta 0:00:01[K     |███████▍                        | 30 kB 7.8 MB/s eta 0:00:01[K     |█████████▉                      | 40 kB 3.5 MB/s eta 0:00:01[K     |████████████▎                   | 51 kB 3.6 MB/s eta 0:00:01[K     |██████████████▊                 | 61 kB 4.3 MB/s eta 0:00:01[K     |█████████████████▏              | 71 kB 4.5 MB/s eta 0:00:01[K     |███████████████████▋            | 81 kB 4.7 MB/s eta 0:00:01[K     |██████████████████████          | 92 kB 5.2 MB/s eta 0:00:01[K     |████████████████████████▌       | 102 kB 4.1 MB/s eta 0:00:01[K     |███████████████████████████     | 112 kB 4.1 MB/s eta 0:00:01[K     |█████████████████████████████▍  | 122 kB 4.1 MB/s eta 0:00:01[K     |███████████████████████████████▉| 133 kB 4.1 MB/s eta 0:00:

In [None]:
injury_history = pd.read_csv('/content/drive/MyDrive/data_490/injury_data/injury_history')
bio_data = pd.read_csv('/content/drive/MyDrive/data_490/processed_data/bio_data.csv')
ts_data = pd.read_pickle('/content/drive/MyDrive/data_490/processed_data/thirty_day_timestep_df')

In [None]:
bio_data.player_age = bio_data['player_age'].str.split(' ').str[0]

In [None]:
indexes = np.random.permutation(len(ts_data))
train_size = int(len(indexes)*.8)
valid_size = int(train_size*.2)
train_indexes = indexes[valid_size:train_size]
np.savetxt("/content/drive/MyDrive/data_490/processed_data/train_indexes.csv", train_indexes, delimiter=",")
valid_indexes = indexes[:valid_size]
np.savetxt("/content/drive/MyDrive/data_490/processed_data/valid_indexes.csv", valid_indexes, delimiter=",")
test_indexes = indexes[train_size:]
np.savetxt("/content/drive/MyDrive/data_490/processed_data/test_indexes.csv", test_indexes, delimiter=",")

In [None]:
bio_data = bio_data.fillna(-1)
injury_history = injury_history.fillna(-1)

In [None]:
ts_train = np.stack(ts_data.loc[train_indexes, 'time_series'].values).astype(np.float16)[:,:,:-1]
ts_valid = np.stack(ts_data.loc[valid_indexes, 'time_series'].values).astype(np.float16)[:,:,:-1]
ts_test = np.stack(ts_data.loc[test_indexes, 'time_series'].values).astype(np.float16)[:,:,:-1]
bio_train = bio_data.iloc[train_indexes, 2:].astype(np.float16)
bio_valid = bio_data.iloc[valid_indexes, 2:].astype(np.float16)
bio_test = bio_data.iloc[test_indexes, 2:].astype(np.float16)
injury_train = injury_history.iloc[train_indexes, 2:].astype(np.float16)
injury_valid = injury_history.iloc[valid_indexes, 2:].astype(np.float16)
injury_test = injury_history.iloc[test_indexes, 2:].astype(np.float16)
train_target =  ts_data.loc[train_indexes, 'injured'].astype(np.float16)
valid_target =  ts_data.loc[valid_indexes, 'injured'].astype(np.float16)
test_target = ts_data.loc[test_indexes, 'injured'].astype(np.float16)

In [None]:
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, dropout=0, time_steps=30, features=85):
        super().__init__()
        self.dropout = layers.Dropout(dropout)
        p = np.zeros(np.shape((1, time_steps, features)))
        columns = np.arange(features)
        rows = np.arange(time_steps)
        p = rows.reshape(-1,1)/1000**(columns*2/features)
        p[:, 0::2] = np.sin(p[:,0::2])
        p[:, 1::2] = np.cos(p[:, 1::2])
        self.p = p

    def call(self, x, **kwargs):
        x = x + self.p
        return self.dropout(x)

In [None]:
from re import X
def transformer_encoder(inputs, head_size, num_heads, conv_filters, kernel_size, dropout=0):
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=conv_filters, kernel_size=kernel_size, activation="relu", padding='same')(x)
    X = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [None]:
def build_mixed_model(
    ts_input_shape,
    injury_input_shape,
    bio_input_shape,
    head_size=256,
    num_heads=8,
    conv_filters=64,
    kernel_size=3,
    num_transformer_blocks=4,
    mlp_units=128,
    mlp_layers=2,
    encoder_dropout=0,
    mlp_dropout=0,
    pe_dropout=0,
    embedding_dropout=0,
    pe=True,
    embedding=True,
):
    ts_input = keras.Input(shape=ts_input_shape)
    x = ts_input
    x = keras.layers.Normalization(axis=1)(x)
    
    #Embedding
    if embedding:
      x = layers.Flatten(input_shape=ts_input_shape)(x)
      x = layers.Dense(ts_input_shape[-1]*ts_input_shape[-2], activation='tanh')(x)
      x = layers.Dropout(embedding_dropout)(x)
      x = layers.Reshape(ts_input_shape)(x)

    #Positional Encoding
    if pe:
      x = PositionalEncoding()(x)
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, conv_filters, kernel_size, encoder_dropout)


    bio_input = keras.Input(shape=bio_input_shape)
    bio = layers.experimental.preprocessing.Normalization()(bio_input)

    injury_input = keras.Input(shape=injury_input_shape) 
    injury = layers.experimental.preprocessing.Normalization()(injury_input)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    x = tf.keras.layers.Concatenate()([x, injury, bio])

    for i in range(mlp_layers):
        x = layers.Dense(mlp_units, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    inputs = [ts_input, injury_input, bio_input]
    outputs = layers.Dense(1, activation="sigmoid")(x)
    return keras.Model(inputs, outputs)

In [None]:
ts_shape = np.shape(ts_train)[1:]
injury_shape = np.shape(injury_train)[1:]
bio_shape = np.shape(bio_train)[1:]

model = build_mixed_model(
    ts_shape,
    injury_shape,
    bio_shape,
    head_size=256,
    num_heads=32,
    conv_filters=32,
    kernel_size=3,
    num_transformer_blocks=5,
    mlp_units=128,
    mlp_layers=4,
    encoder_dropout=.28,
    mlp_dropout=.15,
    pe_dropout=.25,
    embedding_dropout=.07,
    pe=True,
    embedding=False
)

In [None]:
opt = keras.optimizers.Adam(learning_rate=9e-5)
opt = tfa.optimizers.SWA(opt, start_averaging=20, average_period=1)

model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=opt,
    metrics=[keras.metrics.AUC()],
    steps_per_execution=64,
    jit_compile=True
)

In [None]:
def scheduler(epoch, lr, warmup_epochs=20, decay_epochs=100, initial_lr=1e-10, base_lr=1e-4, min_lr=1e-8):
    if epoch <= warmup_epochs:
        pct = epoch / warmup_epochs
        return ((base_lr - initial_lr) * pct) + initial_lr
    if epoch > warmup_epochs and epoch < warmup_epochs+decay_epochs:
        pct = 1 - ((epoch - warmup_epochs) / decay_epochs)
        return ((base_lr - min_lr) * pct) + min_lr

    return min_lr

In [None]:
callbacks = [keras.callbacks.EarlyStopping(patience=10, monitor='val_auc', mode='max', restore_best_weights=False),
             tf.keras.callbacks.LearningRateScheduler(scheduler)]

history = model.fit([ts_train, injury_train, bio_train],
            train_target,
           validation_data=[[ts_valid, injury_valid, bio_valid], valid_target],
           epochs=500, callbacks=callbacks,
           batch_size=16)

In [None]:
model.save('/content/drive/MyDrive/data_490/final_model')