# Importing Libraries

In [50]:
import numpy as np
import pandas as pd
import seaborn as sb
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm, trange

import tensorflow as tf
from tensorflow import keras
from keras import layers
import gc

import warnings
warnings.filterwarnings('ignore')

In [51]:
print(f"Tensor Flow Version: {tf.__version__}")
print(f"Keras Version: {tf.keras.__version__}")
print()
gpu = len(tf.config.list_physical_devices('GPU'))>0
print("GPU is", "AVAILABLE" if gpu else "NOT AVAILABLE")

Tensor Flow Version: 2.10.0
Keras Version: 2.10.0

GPU is AVAILABLE


In [52]:
#set tensorflow to user gpu if available
if gpu:
    print("Using GPU")
    tf.config.experimental.set_memory_growth(tf.config.list_physical_devices('GPU')[0], True)
else:
    print("Not using CPU")

Using GPU


In [53]:
keras.backend.clear_session()
tf.random.set_seed(42)
np.random.seed(42)

# Data Analysis

In [54]:
df = pd.read_csv('data/train_0.csv')
df.shape

(2149381, 61)

In [55]:
df.head()

Unnamed: 0,game_num,event_id,event_time,ball_pos_x,ball_pos_y,ball_pos_z,ball_vel_x,ball_vel_y,ball_vel_z,p0_pos_x,...,boost0_timer,boost1_timer,boost2_timer,boost3_timer,boost4_timer,boost5_timer,player_scoring_next,team_scoring_next,team_A_scoring_within_10sec,team_B_scoring_within_10sec
0,1,1002,-33.31303,-0.0,0.0,1.8548,-0.0,0.0,0.0,41.8048,...,0.0,0.0,0.0,0.0,0.0,0.0,3,B,0,0
1,1,1002,-33.206146,-0.0,0.0,1.8548,-0.0,0.0,0.0,42.2616,...,0.0,0.0,0.0,0.0,0.0,0.0,3,B,0,0
2,1,1002,-33.098114,-0.0,0.0,1.8548,-0.0,0.0,0.0,43.227,...,0.0,0.0,0.0,0.0,0.0,0.0,3,B,0,0
3,1,1002,-32.99319,-0.0,0.0,1.8548,-0.0,0.0,0.0,43.8984,...,0.0,0.0,0.0,0.0,0.0,0.0,3,B,0,0
4,1,1002,-32.887756,-0.0,0.0,1.8548,-0.0,0.0,0.0,44.9606,...,0.0,0.0,0.0,0.0,0.0,0.0,3,B,0,0


# Data Engineering

In [56]:
goal1_pos_x = 10
goal1_pos_y = -105
goal1_pos_z = 0

goal2_pos_x = -10
goal2_pos_y = -105
goal2_pos_z = 0

In [57]:
#calculate distance between ball and player
def distance(x1, y1, z1, x2, y2, z2):
    return np.sqrt((x1-x2)**2 + (y1-y2)**2 + (z1-z2)**2)
    
#calculate velocity of ball
def velocity(x1, y1, z1):
    return np.sqrt(x1**2 + y1**2 + z1**2)
    
def get_new_features(df):
    for j in range(0, 6):
        df[f'p{j}_dis_ball'] = distance(df['ball_pos_x'], df['ball_pos_y'], df['ball_pos_z'], df[f'p{j}_pos_x'], df[f'p{j}_pos_y'], df[f'p{j}_pos_z'])
        df[f'p{j}_dis_goal1'] = distance(goal1_pos_x, goal1_pos_y, goal1_pos_z, df[f'p{j}_pos_x'], df[f'p{j}_pos_y'], df[f'p{j}_pos_z'])
        df[f'p{j}_dis_goal2'] = distance(goal2_pos_x, goal2_pos_y, goal2_pos_z, df[f'p{j}_pos_x'], df[f'p{j}_pos_y'], df[f'p{j}_pos_z'])
        df[f'p{j}_velocity'] = velocity(df[f'p{j}_vel_x'], df[f'p{j}_vel_y'], df[f'p{j}_vel_z'])

    df[f'ball_vel'] = velocity(df[f'ball_vel_x'], df[f'ball_vel_y'], df[f'ball_vel_z'])
    df[f'ball_dis_goal1'] = distance(goal1_pos_x, goal1_pos_y, goal1_pos_z, df[f'ball_pos_x'], df[f'ball_pos_y'], df[f'ball_pos_z'])
    df[f'ball_dis_goal2'] = distance(goal2_pos_x, goal2_pos_y, goal2_pos_z, df[f'ball_pos_x'], df[f'ball_pos_y'], df[f'ball_pos_z'])

    df[f't1_dis_ball'] = df[f'p0_dis_ball'] + df[f'p1_dis_ball'] + df[f'p2_dis_ball']
    df[f't1_dis_goal1'] = df[f'p0_dis_goal1'] + df[f'p1_dis_goal1'] + df[f'p2_dis_goal1']
    df[f't1_dis_goal2'] = df[f'p0_dis_goal2'] + df[f'p1_dis_goal2'] + df[f'p2_dis_goal2']

    df[f't2_dis_ball'] = df[f'p3_dis_ball'] + df[f'p4_dis_ball'] + df[f'p5_dis_ball']
    df[f't2_dis_goal1'] = df[f'p3_dis_goal1'] + df[f'p4_dis_goal1'] + df[f'p5_dis_goal1']
    df[f't2_dis_goal2'] = df[f'p3_dis_goal2'] + df[f'p4_dis_goal2'] + df[f'p5_dis_goal2']

    return df

In [58]:

for i in range(10):
    path = f'data/train_{i}.csv'
    df_temp= pd.read_csv(path)

    #read 10 files into one dataframe
    if i == 0:
        df = df_temp.copy()
    else:
        df = pd.concat([df, df_temp], axis=0)

df.drop(['game_num', 'event_id', 'event_time', 'player_scoring_next', 'team_scoring_next'], axis=1, inplace=True)

df['no_team_scored'] = np.logical_xor(df['team_A_scoring_within_10sec'], df['team_B_scoring_within_10sec'])
df['no_team_scored'] = (~df['no_team_scored']).astype(int)

X = df.drop(['team_A_scoring_within_10sec', 'team_B_scoring_within_10sec', 'no_team_scored'], axis=1)
y = df[['team_A_scoring_within_10sec', 'team_B_scoring_within_10sec', 'no_team_scored']]

X_train, X_val,\
y_train, y_val = train_test_split(X, y, test_size = 0.01, random_state=0)

In [59]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy="mean")),
    ('std_scaler', StandardScaler()),
])

X_train = pd.DataFrame(num_pipeline.fit_transform(X_train), columns=X_train.columns)
X_val = pd.DataFrame(num_pipeline.transform(X_val), columns=X_val.columns)

In [60]:
X_train = get_new_features(X_train)
X_val = get_new_features(X_val)

# Model Development

In [61]:
model = keras.models.Sequential()
model.add(keras.layers.Flatten(input_shape=[87]))
model.add(keras.layers.BatchNormalization())
for _ in range(4):
    model.add(keras.layers.Dense(256, kernel_initializer="he_normal"))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation("elu"))
model.add(keras.layers.Dense(3, activation="softmax"))

model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
              optimizer="nadam",
              metrics=["accuracy"])

In [62]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 87)                0         
                                                                 
 batch_normalization (BatchN  (None, 87)               348       
 ormalization)                                                   
                                                                 
 dense (Dense)               (None, 256)               22528     
                                                                 
 batch_normalization_1 (Batc  (None, 256)              1024      
 hNormalization)                                                 
                                                                 
 activation (Activation)     (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 256)               6

In [63]:
import os
root_logdir = os.path.join(os.curdir, "my_logs")

def get_run_logdir():
    import time
    run_id = time.strftime("run_%y_%m_%d-%H_%M_%S")
    return os.path.join(root_logdir, run_id)

run_logdir = get_run_logdir()

In [64]:
from tabnanny import verbose

print(f'Training on dataset.')

checkpoint_cb = keras.callbacks.ModelCheckpoint("model.h5", save_best_only=True)
early_stopping_cb = keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
tensorboard_cb = keras.callbacks.TensorBoard(run_logdir)

history = model.fit(X_train, y_train, 
            epochs=100, 
            steps_per_epoch=10000, 
            batch_size=32,
            validation_data=(X_val, y_val), 
            verbose=1,
            callbacks=[checkpoint_cb, early_stopping_cb, tensorboard_cb])

del df, X_train, X_val, y_train, y_val
gc.collect()
print()

Training on dataset.


: 

: 

In [None]:
df_test = pd.read_csv('data/test.csv')
df_test = df_test.drop(df_test.columns[0], axis=1)

df_test.isnull().sum().sum()

261422

In [None]:
df_test = pd.DataFrame(num_pipeline.transform(df_test), columns=df_test.columns)
df_test = get_new_features(df_test)

df_test.isnull().sum().sum()

0

In [None]:
preds = model.predict(df_test)
score = tf.nn.softmax(preds)

   90/21911 [..............................] - ETA: 37s

2022-10-29 02:10:19.673127: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




In [None]:
ss = pd.read_csv('data/sample_submission.csv')
ss['team_A_scoring_within_10sec'] = score[:,0]
ss['team_B_scoring_within_10sec'] = score[:,1]
ss.to_csv('Submission.csv', index=False)
ss.head()

Unnamed: 0,id,team_A_scoring_within_10sec,team_B_scoring_within_10sec
0,0,0.0,0.0
1,1,0.0,0.0
2,2,0.0,0.0
3,3,0.0,0.0
4,4,0.0,2.4781959999999998e-36
