In [72]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization


In [73]:
# Load the datasets
player_positions = pd.read_csv('data/train_locs.csv')
game_outcomes = pd.read_csv('data/train_pbp.csv')

player_positions = player_positions.to_numpy()

In [74]:

# Parameters
court_length, court_width = 47, 50
grid_size = 1  # 1 foot grid size

# Convert court dimensions to a grid
n_rows, n_cols = int(court_length / grid_size), int(court_width / grid_size)

def positions_to_grid(players):
    # Initialize an empty grid
    grid = np.zeros((court_length, court_width, 1))  # Single channel for player positions
    
    # if shooter is on the right side of the court, flip the coordinates
    if players[5, 1] > 47:
        for i in range(10):
            players[i, 1] = 94 - players[i, 1]
            players[i, 2] = 50 - players[i, 2]

    # Map player positions to grid
    for row in players:
        x, y, code = min(int(row[1] // grid_size),  n_rows - 1), min(int(row[2] // grid_size), n_cols - 1), row[3]
        if 'd' in code:  # Defense
            grid[x, y, 0] = 1
        elif 't' in code:  # Offense excluding shooter
            grid[x, y, 0] = 2
        elif 's' in code:  # Shooter
            grid[x, y, 0] = 3
    return grid

X = np.array([positions_to_grid(player_positions[i*10:(i+1)*10, :]) for i in range(int(player_positions.shape[0]/10))])

y = game_outcomes["is_oreb"].to_numpy()

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)




In [75]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((24644, 47, 50, 1), (6161, 47, 50, 1), (24644,), (6161,))

In [76]:

# Separate the dataset into offensive and defensive rebounds
offensive_indices = np.where(y_train == 1)[0]
defensive_indices = np.where(y_train == 0)[0]


# Now, let's make sure we sample an equal number of examples for both classes
# Determine the size of the smaller class to balance the dataset
min_size = min(len(offensive_indices), len(defensive_indices))

# Randomly sample from the larger class to match the size of the smaller class
offensive_indices_balanced = np.random.choice(offensive_indices, min_size, replace=False)
defensive_indices_balanced = np.random.choice(defensive_indices, min_size, replace=False)

# Combine the indices and then split
balanced_indices = np.concatenate([offensive_indices_balanced, defensive_indices_balanced])

# Extract the balanced dataset
X_balanced = X_train[balanced_indices]
y_balanced = y_train[balanced_indices]

# Now X_train, X_test have a 50/50 split of offensive and defensive rebounds

In [77]:
X_balanced.shape, y_balanced.shape

((13888, 47, 50, 1), (13888,))

In [130]:
from tensorflow.keras.layers import Masking, Input, Conv2D, MaxPooling2D, Flatten, Dense, Multiply, GlobalAveragePooling2D, Dropout, BatchNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras import regularizers

# Define the model
model = Sequential()
model.add(Masking(mask_value=0, input_shape=(court_length, court_width, 1)))
model.add(Conv2D(16, (45, 45), activation='relu',kernel_regularizer = regularizers.L1L2(l1=1e-4, l2=1e-3)))

# add final layers
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))
opt = SGD(learning_rate=1)
model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

model.summary()


Model: "sequential_10"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 masking_10 (Masking)        (None, 47, 50, 1)         0         
                                                                 
 conv2d_11 (Conv2D)          (None, 3, 6, 16)          32416     
                                                                 
 flatten_10 (Flatten)        (None, 288)               0         


                                                                 
 dense_10 (Dense)            (None, 1)                 289       
                                                                 
Total params: 32,705
Trainable params: 32,705
Non-trainable params: 0
_________________________________________________________________


In [114]:
from keras.utils import plot_model

# Assuming `model` is your Keras model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [131]:
history = model.fit(X_train, y_train,
                    batch_size=128,
                    epochs=20,  # Adjust based on convergence and computational resources
                    validation_data=(X_test, y_test))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [129]:
example = np.zeros((47,50,1))


for i in range(10):
    if i<5:
        r = np.random.randint(12,25)
        theta = np.random.rand()*np.pi
        x = min(r*np.cos(theta) + 25,46)
        y = min(r*np.sin(theta) + 4, 49)
        example[int(x),int(y),0] = 1
    if i == 5: 
        r = np.random.randint(0,12)
        theta = np.random.rand()*np.pi
        x = min(r*np.cos(theta) + 25,46)
        y = min(r*np.sin(theta) + 4, 49)
        example[int(x),int(y),0] = 3
    if i > 5:
        r = np.random.randint(0,12)
        theta = np.random.rand()*np.pi
        x = min(r*np.cos(theta) + 25,46)
        y = min(r*np.sin(theta) + 4, 49)
        example[int(x),int(y),0] = 2

model.predict(np.array([example]))



array([[0.26389772]], dtype=float32)