In [191]:
import polars as pl
from polars import col as c
from tqdm import tqdm
import logging
import tensorflow as tf
tf.get_logger().setLevel(logging.ERROR)

In [192]:
import numpy as np
targets = []

with open("data/rocket_league_skillshots.data") as file:
    lines = [line.strip(" \n") for line in file.readlines()]
headers = lines[0]
data = lines[1:]
for line in data:
    if len(line.split()) == 1:
        targets.append(float(line.split()[0]))

targets = np.array(targets)
targets[targets == -1] = 4
targets -= 1
stepcounts = []
counter = 0

for line in data:
    if len(line.split()) == 1:
        counter = 0
    else:
        counter += 1
        stepcounts.append(counter)

n = len(targets)
design = np.zeros((n, max(stepcounts), len(data[2].split())))

samplenum = 0
curmat = []
for line in data[1:]:
    if len(line.split()) > 1:
        curmat.append([float(num) for num in line.split()])
    if len(line.split()) == 1:
        design[samplenum, 0:len(curmat), :] = np.array(curmat).astype("float32")
        curmat = []
        samplenum += 1
# np.save("data/unscaled_design_tensor", design)
# np.save("data/targets", targets)
print(design.shape, targets.shape, design.dtype, targets.dtype)

(298, 64, 18) (298,) float64 float64


In [193]:
from sklearn.model_selection import train_test_split as tts
x_train, x_test, y_train, y_test = tts(design, targets,
                                       stratify = targets,
                                       test_size = 0.25,
                                       random_state = 100)
print(x_test.shape)
means = x_train[:, :, :7].mean(axis = 0)
sds = x_train[:, :, :7].std(axis = 0)
sds[sds == 0] = 1
x_train[:, :, :7] -= means
x_train[:, :, :7] /= sds
x_test[:, :, :7] -= means
x_test[:, :, :7] /= sds

(75, 64, 18)


In [194]:
from sklearn.model_selection import StratifiedKFold
from tensorflow.keras.utils import to_categorical

skf = StratifiedKFold(n_splits = 5,
                      random_state = 100,
                      shuffle = True)
splits = skf.split(x_train, y_train)
splits = list(splits) # all my homies hate generator objects

# convert to softmax vectors after stratify
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [195]:
from tensorflow import keras
from tensorflow.keras import layers
import plotly.express as px
import polars as pl
from polars import col as c

def instantiate_model():
    model = keras.Sequential([
        layers.Input((design.shape[1], design.shape[2])),
        layers.LSTM(64, return_sequences = True),
        layers.LSTM(32),
        layers.Dense(7, activation = "softmax")
    ])
    model.compile(
        optimizer = "rmsprop",
        loss = "categorical_crossentropy",
        metrics = ["accuracy", "auc"]
    )
    return model

accs = []
losses = []
val_accs = []
val_losses = []
for t_index, v_index in splits:
    xt_split = x_train[t_index]
    xv_split = x_train[v_index]
    yt_split = y_train[t_index]
    yv_split = y_train[v_index]
    model = instantiate_model()
    history = model.fit(
        xt_split, yt_split,
        epochs = 200,
        batch_size = 64,
        validation_data = (xv_split, yv_split),
        verbose = False
    )
    metrics = history.history
    accs.append(metrics["accuracy"])
    losses.append(metrics["loss"])
    val_accs.append(metrics["val_accuracy"])
    val_losses.append(metrics["val_loss"])

accs = np.array(accs)
losses = np.array(losses)
val_accs = np.array(val_accs)
val_losses = np.array(val_losses)

metric_tensor = np.dstack([accs, losses, val_accs, val_losses])
means = metric_tensor.mean(axis = 0)
mean_metrics = pl.DataFrame(means, schema = ["Mean Accuracy", "Mean Loss",
                                             "Mean Validation Accuracy", "Mean Validation Loss"]) \
    .with_row_index(offset = 1) \
    .rename({"index": "Epoch"}) \
    .unpivot(index = "Epoch", value_name = "Metric Value", variable_name = "Metric")
fig = px.line(mean_metrics.to_pandas(),
              x = "Epoch", y = "Metric Value", color = "Metric",
              title = "Training Metrics for Cross-Validated Simple Model")\
    .update_layout(hovermode = "x unified")
fig.show()

In [196]:
import tensorflow as tf

models = {}

for i in range(20):
    model = instantiate_model()
    history = model.fit(
        tf.Variable(x_train), tf.Variable(y_train), # get rid of the retrace warning
        epochs = 170,
        batch_size = 64,
        verbose = False
    )
    eval = model.evaluate(x_test, y_test, verbose = False)
    models[eval[1]] = model
bestmod = models[max(list(models.keys()))]
best_eval = bestmod.evaluate(x_test, y_test, verbose = False)
pl.DataFrame({
    "Metric": ["Loss", "Accuracy", "AUC"],
    "Value": best_eval
}).to_pandas()

Metric,Value
str,f64
"""Loss""",0.779691
"""Accuracy""",0.773333
"""AUC""",0.951911
