In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
from google.colab import drive
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf
!pip install tensorflow_addons
import tensorflow_addons as tfa
keras.utils.set_random_seed(9)
drive.mount('/content/drive')

Collecting tensorflow_addons
  Downloading tensorflow_addons-0.16.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (1.1 MB)
[K     |████████████████████████████████| 1.1 MB 7.3 MB/s 
Installing collected packages: tensorflow-addons
Successfully installed tensorflow-addons-0.16.1
Mounted at /content/drive


In [None]:
injury_history = pd.read_csv('/content/drive/MyDrive/data_490/injury_data/injury_history')
bio_data = pd.read_csv('/content/drive/MyDrive/data_490/processed_data/bio_data.csv')
ts_data = pd.read_pickle('/content/drive/MyDrive/data_490/processed_data/thirty_day_timestep_df')

In [None]:
bio_data.player_age = bio_data['player_age'].str.split(' ').str[0]

In [None]:
indexes = np.random.permutation(len(ts_data))
train_size = int(len(indexes)*.8)
valid_size = int(train_size*.2)
train_indexes = indexes[valid_size:train_size]
valid_indexes = indexes[:valid_size]
test_indexes = indexes[train_size:]

In [None]:
bio_data = bio_data.fillna(-1)
injury_history = injury_history.fillna(-1)

In [None]:
ts_train = np.stack(ts_data.loc[train_indexes, 'time_series'].values).astype(np.float16)[:,:,:-1]
ts_valid = np.stack(ts_data.loc[valid_indexes, 'time_series'].values).astype(np.float16)[:,:,:-1]
ts_test = np.stack(ts_data.loc[test_indexes, 'time_series'].values).astype(np.float16)[:,:,:-1]
bio_train = bio_data.iloc[train_indexes, 2:].astype(np.float16)
bio_valid = bio_data.iloc[valid_indexes, 2:].astype(np.float16)
bio_test = bio_data.iloc[test_indexes, 2:].astype(np.float16)
injury_train = injury_history.iloc[train_indexes, 2:].astype(np.float16)
injury_valid = injury_history.iloc[valid_indexes, 2:].astype(np.float16)
injury_test = injury_history.iloc[test_indexes, 2:].astype(np.float16)
train_target =  ts_data.loc[train_indexes, 'injured'].astype(np.float16)
valid_target =  ts_data.loc[valid_indexes, 'injured'].astype(np.float16)
test_target = ts_data.loc[test_indexes, 'injured'].astype(np.float16)

In [None]:
class PositionalEncoding(tf.keras.layers.Layer):
    """Positional encoding."""
    def __init__(self, dropout=0, time_steps=30, features=85):
        super().__init__()
        self.dropout = layers.Dropout(dropout)
        p = np.zeros(np.shape((1, time_steps, features)))
        columns = np.arange(features)
        rows = np.arange(time_steps)
        p = rows.reshape(-1,1)/1000**(columns*2/features)
        p[:, 0::2] = np.sin(p[:,0::2])
        p[:, 1::2] = np.cos(p[:, 1::2])
        self.p = p

    def call(self, x, **kwargs):
        x = x + self.p
        return self.dropout(x)

In [None]:
from re import X
def transformer_encoder(inputs, head_size, num_heads, conv_filters, kernel_size, dropout=0):
    # Normalization and Attention
    x = keras.layers.Normalization()(inputs)
    x = layers.LayerNormalization(epsilon=1e-6)(x)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=conv_filters, kernel_size=kernel_size, activation="relu", padding='same')(x)
    X = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [None]:
def build_mixed_model(
    ts_input_shape,
    injury_input_shape,
    bio_input_shape,
    head_size,
    num_heads,
    conv_filters,
    kernel_size,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    ts_input = keras.Input(shape=ts_input_shape)
    x = ts_input

    #Embedding
    x = layers.Flatten(input_shape=ts_input_shape)(x)
    x = layers.Dense(ts_input_shape[-1]*ts_input_shape[-2], activation='tanh')(x)
    x = layers.Reshape(ts_input_shape)(x)

    #Positional Encoding
    x = PositionalEncoding()(x)
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, conv_filters, kernel_size, dropout)


    bio_input = keras.Input(shape=bio_input_shape)
    bio = layers.experimental.preprocessing.Normalization()(bio_input)

    injury_input = keras.Input(shape=injury_input_shape) 
    injury = layers.experimental.preprocessing.Normalization()(injury_input)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    x = tf.keras.layers.Concatenate()([x, injury, bio])

    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    inputs = [ts_input, injury_input, bio_input]
    outputs = layers.Dense(1, activation="sigmoid")(x)
    return keras.Model(inputs, outputs)

In [None]:
ts_shape = np.shape(ts_train)[1:]
injury_shape = np.shape(injury_train)[1:]
bio_shape = np.shape(bio_train)[1:]


mixed_model = build_mixed_model(
    ts_shape,
    injury_shape,
    bio_shape,
    head_size=256,
    num_heads=8,
    conv_filters=64,
    kernel_size=3,
    num_transformer_blocks=4,
    mlp_units=[64,64,64,64],
    mlp_dropout=0.2,
    dropout=0.2,
)


opt = keras.optimizers.Adam(learning_rate=1e-6)
opt = tfa.optimizers.SWA(opt)

mixed_model.compile(
    loss=keras.losses.BinaryCrossentropy(),
    optimizer=opt,
    metrics=[keras.metrics.AUC()],
    steps_per_execution=32,
    jit_compile=True
)

In [None]:
def scheduler(epoch, lr, warmup_epochs=20, decay_epochs=100, initial_lr=1e-8, base_lr=1e-4, min_lr=1e-7):
    if epoch <= warmup_epochs:
        pct = epoch / warmup_epochs
        return ((base_lr - initial_lr) * pct) + initial_lr
    if epoch > warmup_epochs and epoch < warmup_epochs+decay_epochs:
        pct = 1 - ((epoch - warmup_epochs) / decay_epochs)
        return ((base_lr - min_lr) * pct) + min_lr

    return min_lr

In [None]:
#tf.keras.utils.plot_model(mixed_model)

In [None]:
#model.summary()

callbacks = [keras.callbacks.EarlyStopping(patience=50, restore_best_weights=True),
             tf.keras.callbacks.LearningRateScheduler(scheduler)]

mixed_model.fit(
    [ts_train, injury_train, bio_train],
    train_target,
    validation_data=[[ts_valid, injury_valid, bio_valid], valid_target],
    epochs=300,
    batch_size=128,
    callbacks=callbacks
)

#model.evaluate(ts_test, test_target, verbose=1)

Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300
Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78

<keras.callbacks.History at 0x7fe8098da3d0>

In [None]:
pred = mixed_model.predict([ts_test, injury_test, bio_test])

In [None]:
high_risk = np.where(pred > np.percentile(pred, 95))
normal_risk = np.where((pred > np.percentile(pred, 40)) & (pred < np.percentile(pred, 95)))
low_risk = np.where(pred < np.percentile(pred, 40))

In [None]:
np.mean(test_target.iloc[normal_risk[0]])

0.0202

In [None]:
np.mean(test_target.iloc[low_risk[0]])

0.00898

In [None]:
np.mean(test_target.iloc[high_risk[0]])

0.0461

In [None]:
np.percentile(pred, 50)

In [None]:
np.max(pred)

In [None]:
injured = np.where(test_target == 1)
healthy = np.where(test_target == 0)

In [None]:
np.mean(pred)

In [None]:
np.mean(pred[injured[0]])

In [None]:
np.mean(pred[healthy[0]])