In [10]:
import os
import gc
import pandas as pd
import tensorflow as tf
import numpy as np

from tensorflow import keras
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam, RMSprop
from tensorflow.keras.models import Sequential

from sklearn.model_selection import train_test_split

In [11]:
pd.options.display.max_columns = None

In [12]:
data_path = '/kaggle/input/tabular-playground-series-oct-2022/'
feather_path = '../input/rocket-league/'

In [13]:
train_dtypes_file = os.path.join(data_path, 'train_dtypes.csv')
test_dtypes_file = os.path.join(data_path, 'test_dtypes.csv')
sample_submission_file = os.path.join(data_path, 'sample_submission.csv')
sample_submission_file2 = os.path.join(data_path, 'sample_submission2.csv')
train_feather_file = os.path.join(feather_path, 'train.feather')
test_feather_file = os.path.join(feather_path, 'test.feather')

In [14]:
train_dtypes_df = pd.read_csv(train_dtypes_file)
test_dtypes_df = pd.read_csv(test_dtypes_file)
cols_dtypes = {k: v for (k, v) in zip(train_dtypes_df.column, train_dtypes_df.dtype)}

Ignoring the useless columns
Lets see what columns we will not use to train.

In [15]:
[c for c in train_dtypes_df['column'] if c not in test_dtypes_df['column'].values]

['game_num',
 'event_id',
 'event_time',
 'player_scoring_next',
 'team_scoring_next',
 'team_A_scoring_within_10sec',
 'team_B_scoring_within_10sec']

## Feature Selection

In [16]:
targets = ['team_A_scoring_within_10sec', 'team_B_scoring_within_10sec']
useless_cols = ['event_id', 'ball_vel_x', 'ball_vel_y', 'ball_vel_z', 'p0_vel_x', 'p0_vel_y', 'p0_vel_z', 
                'p1_vel_x', 'p1_vel_y', 'p1_vel_z', 'p2_vel_x', 'p2_vel_y', 'p2_vel_z', 
                'p3_vel_x', 'p3_vel_y', 'p3_vel_z', 'p4_vel_x', 'p4_vel_y', 'p4_vel_z', 
                'p5_vel_x', 'p5_vel_y', 'p5_vel_z', 
                'boost0_timer', 'boost1_timer', 'boost2_timer', 
                'boost3_timer', 'boost4_timer', 'boost5_timer', 'team_A_scoring_within_10sec', 'team_B_scoring_within_10sec'
               ]
useless_cols2 = ['ball_vel_x', 'ball_vel_y', 'ball_vel_z', 'p0_vel_x', 'p0_vel_y', 'p0_vel_z', 
                'p1_vel_x', 'p1_vel_y', 'p1_vel_z', 'p2_vel_x', 'p2_vel_y', 'p2_vel_z', 
                'p3_vel_x', 'p3_vel_y', 'p3_vel_z', 'p4_vel_x', 'p4_vel_y', 'p4_vel_z', 
                'p5_vel_x', 'p5_vel_y', 'p5_vel_z', 
                 'boost0_timer', 'boost1_timer', 'boost2_timer', 
                'boost3_timer', 'boost4_timer', 'boost5_timer']
use_cols = [c for c in train_dtypes_df['column'] if c not in useless_cols]
features = test_dtypes_df['column'][1:].values # drop id

In [17]:
train_df = pd.read_feather(train_feather_file)

In [18]:
def eliminate_useless_columns(df, columns_list):
    df = df.drop(columns_list, axis=1, inplace=True)

In [19]:
def normalizer(df, columns, min_data, max_data):
    for column in columns:
        min_data = df[column].min()
        max_data = df[column].max()
        df[column] = (df[column] - min_data) / (max_data - min_data)
    return df, min_data, max_data

In [20]:
targets_df = pd.DataFrame()
targets_df['team_A_scoring_within_10sec'] = train_df['team_A_scoring_within_10sec']
targets_df['team_B_scoring_within_10sec'] = train_df['team_B_scoring_within_10sec']
#targets_df = targets_df[(targets_df['team_A_scoring_within_10sec']==1) | (targets_df['team_B_scoring_within_10sec']==1)]
gc.collect()

0

In [21]:
train_df.fillna(0, inplace=True)
#train_df = train_df[(train_df['team_A_scoring_within_10sec']==1) | (train_df['team_B_scoring_within_10sec']==1)]
gc.collect()

23

In [22]:
# prepariapplyng data
def prepare_data(df):

    # velocity value for 0-5 players and the ball
    for i in range(6):
        df[f'p{i}_vel'] = np.sqrt(
           df[f'p{i}_vel_x']**2+
           df[f'p{i}_vel_y']**2+
           df[f'p{i}_vel_z']**2)
    for i in range(6):
        df.drop([f'p{i}_vel_x'], axis=1, inplace=True)
        df.drop([f'p{i}_vel_y'], axis=1, inplace=True)
        df.drop([f'p{i}_vel_z'], axis=1, inplace=True)
        
    df['ball_vel'] = np.sqrt(
       df['ball_vel_x']**2+
       df['ball_vel_y']**2+
       df['ball_vel_z']**2)
    df.drop(['ball_vel_x', 'ball_vel_y', 'ball_vel_z'], axis=1, inplace=True)
    df.drop(['boost0_timer', 'boost1_timer', 'boost2_timer', 'boost3_timer', 'boost4_timer', 'boost5_timer'], axis=1, inplace=True)
    
    # distances from players to ball
    for i in range(6):
        df[f'p{i}_dist_ball'] = np.sqrt(
           (df[f'p{i}_pos_x']-df['ball_pos_x'])**2+
           (df[f'p{i}_pos_y']-df['ball_pos_y'])**2+
           (df[f'p{i}_pos_z']-df['ball_pos_z'])**2)
    # mean distance from team A or team B to the ball
    df['mean_dist_teamA_to_ball'] = (df['p0_dist_ball']+ df['p1_dist_ball']+ df['p2_dist_ball'])/3
    df['mean_dist_teamB_to_ball'] = (df['p3_dist_ball']+ df['p4_dist_ball']+ df['p5_dist_ball'])/3

    # mean velocity for each team
    df['mean_vel_teamA'] = (df['p0_vel']+ df['p1_vel']+ df['p2_vel'])/3
    df['mean_vel_teamB'] = (df['p3_vel']+ df['p4_vel']+ df['p5_vel'])/3

    # mean boost for each team
    df['median_boost_A'] = (df['p0_boost'] + df['p1_boost'] + df['p2_boost']) / 3
    df['median_boost_B'] = (df['p3_boost'] + df['p4_boost'] + df['p5_boost']) / 3
    return df

In [23]:
train_df.drop(['event_id', 'team_A_scoring_within_10sec', 'team_B_scoring_within_10sec'], axis=1, inplace = True)
gc.collect()

236

In [24]:
train_df = prepare_data(train_df)
gc.collect()

13

In [25]:
train_df.head()

Unnamed: 0,ball_pos_x,ball_pos_y,ball_pos_z,p0_pos_x,p0_pos_y,p0_pos_z,p0_boost,p1_pos_x,p1_pos_y,p1_pos_z,p1_boost,p2_pos_x,p2_pos_y,p2_pos_z,p2_boost,p3_pos_x,p3_pos_y,p3_pos_z,p3_boost,p4_pos_x,p4_pos_y,p4_pos_z,p4_boost,p5_pos_x,p5_pos_y,p5_pos_z,p5_boost,p0_vel,p1_vel,p2_vel,p3_vel,p4_vel,p5_vel,ball_vel,p0_dist_ball,p1_dist_ball,p2_dist_ball,p3_dist_ball,p4_dist_ball,p5_dist_ball,mean_dist_teamA_to_ball,mean_dist_teamB_to_ball,mean_vel_teamA,mean_vel_teamB,median_boost_A,median_boost_B
0,-0.0,0.0,1.8548,41.804798,-51.924999,0.3402,33.34375,-5.0654,-75.519402,0.3402,27.328125,-39.6544,-49.894398,0.3402,22.34375,5.0604,75.532005,0.3402,33.34375,-41.766403,52.006401,0.341,33.34375,39.757401,49.821602,0.3402,22.34375,7.434417,9.967111,12.458232,8.06761,7.690071,12.398498,0.0,66.67939,75.704239,63.751205,75.716476,66.718735,63.758423,68.711617,68.731209,9.953254,9.385392,27.671875,29.671875
1,-0.0,0.0,1.8548,42.2616,-52.451202,0.3402,33.34375,-4.9298,-74.326401,0.49,23.84375,-38.626202,-48.871803,0.3402,30.625,5.011,74.772995,0.3402,33.34375,-42.407204,52.622402,0.5944,33.34375,38.781399,48.761402,0.3402,30.625,9.157838,15.035421,16.1597,9.966599,11.653255,15.967882,0.0,67.375557,74.502213,62.311558,74.956024,67.594948,62.321468,68.06311,68.29081,13.450986,12.529246,29.265625,32.4375
2,-0.0,0.0,1.8548,43.227001,-54.054001,1.1466,33.34375,-4.688,-72.959,1.082,23.140625,-36.3452,-47.016201,1.1346,26.890625,4.9168,73.767601,0.3402,33.34375,-43.778198,54.081402,1.645,33.34375,36.503998,46.920601,1.0214,26.890625,13.024463,17.01021,21.310888,11.863656,20.903774,21.332428,0.0,69.2164,73.113541,59.430759,73.946793,69.579971,59.454006,67.253563,67.660255,17.115187,18.033285,27.796875,31.1875
3,-0.0,0.0,1.8548,43.898399,-55.225601,1.5936,33.34375,-3.8408,-69.125801,1.9106,23.140625,-35.173599,-45.287998,1.5598,23.375,4.6906,71.631401,0.4394,31.0,-44.9198,55.5252,2.1334,33.34375,35.151798,44.873398,1.5286,23.375,22.081198,26.259382,31.118071,16.746733,20.673391,30.489305,0.0,70.54789,69.232445,57.343452,71.798767,71.420685,57.003307,65.707924,66.740921,26.486216,22.636477,26.625,29.25
4,-0.0,0.0,1.8548,44.960602,-57.087799,2.072,33.34375,-3.272,-66.594398,1.9814,23.140625,-34.066002,-43.518997,1.8632,19.84375,4.501,69.9216,1.0822,27.46875,-46.170399,57.104401,2.2552,33.34375,33.584801,42.2034,1.9106,19.84375,21.858263,25.951704,32.038734,19.30131,20.159603,31.731245,0.0,72.667183,66.67485,55.266586,70.07058,73.435539,53.935787,64.869545,65.813972,26.616234,23.730721,25.453125,26.875


In [26]:
min_data = None
max_data = None

In [27]:
lista_columnas_normalizar = ['ball_pos_x', 'ball_pos_y', 'ball_pos_z', 'p0_pos_x', 'p0_pos_y', 'p0_pos_z', 'p1_pos_x', 'p1_pos_y', 'p1_pos_z', 'p2_pos_x', 'p2_pos_y', 'p2_pos_z', 'p3_pos_x', 'p3_pos_y', 'p3_pos_z', 'p4_pos_x', 'p4_pos_y', 'p4_pos_z', 'p5_pos_x', 'p5_pos_y', 'p5_pos_z', 'p0_boost', 'p1_boost', 'p2_boost', 'p3_boost', 'p4_boost', 'p5_boost', 'p0_vel', 'p1_vel', 'p2_vel', 'p3_vel', 'p4_vel', 'p5_vel', 'ball_vel', 'p0_dist_ball', 'p1_dist_ball', 'p2_dist_ball', 'p3_dist_ball', 'p4_dist_ball', 'p5_dist_ball', 'mean_dist_teamA_to_ball', 'mean_dist_teamB_to_ball', 'mean_vel_teamA', 'mean_vel_teamB', 'median_boost_A', 'median_boost_B']

In [28]:
train_df, min_data, max_data = normalizer(train_df, lista_columnas_normalizar, min_data, max_data)

In [29]:
test_df = pd.read_feather(test_feather_file)

In [30]:
train_df.shape

(21198036, 46)

In [31]:
test_df.shape

(701143, 54)

In [32]:
test_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 701143 entries, 0 to 701142
Data columns (total 54 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   ball_pos_x    701143 non-null  float32
 1   ball_pos_y    701143 non-null  float32
 2   ball_pos_z    701143 non-null  float32
 3   ball_vel_x    701143 non-null  float32
 4   ball_vel_y    701143 non-null  float32
 5   ball_vel_z    701143 non-null  float32
 6   p0_pos_x      694843 non-null  float32
 7   p0_pos_y      694843 non-null  float32
 8   p0_pos_z      694843 non-null  float32
 9   p0_vel_x      694843 non-null  float32
 10  p0_vel_y      694843 non-null  float32
 11  p0_vel_z      694843 non-null  float32
 12  p0_boost      694843 non-null  float16
 13  p1_pos_x      694965 non-null  float32
 14  p1_pos_y      694965 non-null  float32
 15  p1_pos_z      694965 non-null  float32
 16  p1_vel_x      694965 non-null  float32
 17  p1_vel_y      694965 non-null  float32
 18  p1_v

In [33]:
test_df.fillna(0, inplace=True)

In [34]:
test_df.head()

Unnamed: 0,ball_pos_x,ball_pos_y,ball_pos_z,ball_vel_x,ball_vel_y,ball_vel_z,p0_pos_x,p0_pos_y,p0_pos_z,p0_vel_x,p0_vel_y,p0_vel_z,p0_boost,p1_pos_x,p1_pos_y,p1_pos_z,p1_vel_x,p1_vel_y,p1_vel_z,p1_boost,p2_pos_x,p2_pos_y,p2_pos_z,p2_vel_x,p2_vel_y,p2_vel_z,p2_boost,p3_pos_x,p3_pos_y,p3_pos_z,p3_vel_x,p3_vel_y,p3_vel_z,p3_boost,p4_pos_x,p4_pos_y,p4_pos_z,p4_vel_x,p4_vel_y,p4_vel_z,p4_boost,p5_pos_x,p5_pos_y,p5_pos_z,p5_vel_x,p5_vel_y,p5_vel_z,p5_boost,boost0_timer,boost1_timer,boost2_timer,boost3_timer,boost4_timer,boost5_timer
0,-56.270802,29.51,17.3486,24.499399,-1.3114,11.006801,-35.776199,73.136803,1.248,18.386999,-5.135,-21.4028,0.0,-72.905403,28.819399,11.7,-19.212801,-1.8104,5.704,49.40625,-36.3792,-18.8584,0.3402,-45.022797,-5.5774,0.0042,87.8125,-3.4352,93.975403,0.3402,-27.2624,-2.5516,0.005,69.0625,-23.3904,101.715599,28.972601,25.478001,11.5176,-18.315401,83.125,-51.055599,54.5942,0.34,3.9484,-16.7108,0.0074,71.0,0.0,-3.263672,-6.132812,-6.875,-7.015625,-3.230469
1,2.8528,70.195999,8.949,-8.1522,-65.577202,18.5364,22.926001,87.5438,0.3396,-41.9548,-18.795,0.0114,0.78418,5.9602,59.600201,0.34,-44.143398,-12.936601,0.0024,34.5,69.736603,-11.2536,0.3402,-0.534,-45.694801,0.0042,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.2516,86.967804,1.5382,27.584,-26.174599,-0.2928,84.1875,39.126598,92.815002,0.3402,-15.4968,-14.8766,0.005,66.6875,0.0,-1.615234,-5.96875,-5.503906,0.0,-6.511719
2,52.366402,-98.036797,14.249001,-2.0,-45.291401,-15.4234,51.288998,-102.055595,6.357,8.638,0.3404,19.509399,37.1875,-31.9006,31.3752,1.9754,-32.762001,-24.698999,-0.2908,0.0,0.6616,-107.274406,0.354,27.366798,1.2226,-0.0284,45.5,40.266998,-35.871998,0.3402,2.0586,-45.0298,0.0042,47.84375,51.2798,-93.799805,15.626801,-4.6872,-44.2682,-0.309,23.140625,16.9426,38.711998,0.3402,0.9058,-27.413601,0.0042,80.375,-0.031586,0.0,0.0,-3.128906,0.0,-3.677734
3,36.365402,53.961403,23.161798,24.829399,-10.3286,-0.918,16.424799,0.9328,0.3402,39.627602,18.2836,0.0,88.3125,-39.392799,40.812,0.3398,29.773998,-12.6544,0.006,82.4375,6.6148,84.760994,3.3024,-13.5962,35.463402,-25.9512,2.744141,-45.288998,83.107803,0.34,-43.5368,11.2418,0.0068,36.09375,11.0128,53.168999,18.1036,-9.2722,-16.618799,-6.7208,49.03125,17.170799,93.164406,0.3402,-5.0532,-15.106999,0.0052,96.0625,-6.429688,-4.089844,-7.832031,-5.761719,0.0,-3.923828
4,-23.2624,-53.391003,21.2096,-1.6676,-24.778799,3.4812,-19.681801,-93.913605,4.7832,-4.1376,0.8012,11.2656,54.125,-32.0872,-33.042599,6.3446,-15.919001,-3.7912,-3.014,43.125,-61.132202,-81.612404,0.3398,23.8808,-26.558998,0.0092,100.0,-31.090801,-16.736601,14.4918,-12.585401,16.1012,6.9564,58.03125,-36.6856,29.899399,0.3732,-16.562401,-34.652,0.002,85.5,63.7966,-15.374201,0.3402,22.200001,24.928001,0.0042,0.0,-9.882812,0.0,-0.445557,-0.491455,-7.828125,0.0


In [35]:
test_df = prepare_data(test_df)

In [38]:
test_df, _, _ = normalizer(test_df, lista_columnas_normalizar, min_data, max_data)

### Split Data

In [39]:
per = 1
length_train = train_df.shape[0]

X_train, X_valid = train_df[:round(length_train*(1-per/100) - 1)], train_df[round(length_train*(1-per/100) - 1):]
Y_train, Y_valid = targets_df[:round(length_train*(1-per/100) - 1)], targets_df[round(length_train*(1-per/100) - 1):]
#X_train, X_val, Y_train, Y_val = train_test_split(train_df, targets_df, test_size=0.01, random_state=0)
del (train_df, targets_df)
gc.collect()

9734

## DNN Model

In [40]:
model = Sequential([
    Dense(128, activation='relu', input_shape = [X_train.shape[1]]),
    BatchNormalization(),
    Dropout(0.05),
    
    Dense(256, activation='relu'),    
    BatchNormalization(),
    Dropout(0.1),
    
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.15),
    
    Dense(128, activation='relu'),    
    BatchNormalization(),
    Dropout(0.2),
    
    Dense(2)
])

2022-11-01 00:59:47.733165: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-01 00:59:47.739646: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-01 00:59:47.933862: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-01 00:59:47.934991: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-11-01 00:59:47.935814: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from S

In [41]:
model2 = Sequential([
    Dense(32, activation='relu', input_shape=[X_train.shape[1]]), 
    
    Dense(64, activation='relu'),
    Dropout(0.1), 
    
    Dense(128, activation='relu'),
    Dropout(0.3), 
    
    Dense(2) 
])

In [42]:
model.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])

In [43]:
model2.compile(optimizer='adam',
             loss='binary_crossentropy',
             metrics=['accuracy'])

In [None]:
history = model.fit(X_train, Y_train, 
              epochs=20, 
              steps_per_epoch=X_train.shape[0]//512, 
              batch_size=512,
              validation_data=(X_valid, Y_valid),
         )

In [63]:
history2 = model2.fit(X_train, Y_train, 
              epochs=16, 
              steps_per_epoch=X_train.shape[0]//512, 
              batch_size=512,
              validation_data=(X_valid, Y_valid),
         )

Epoch 1/16
Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [64]:
predictions = model.predict(test_df)
score = tf.nn.softmax(predictions)

In [65]:
predictions2 = model2.predict(test_df)
score2 = tf.nn.softmax(predictions2)

In [66]:
ss = pd.read_csv(sample_submission_file)
ss['team_A_scoring_within_10sec'] = score[:,0]
ss['team_B_scoring_within_10sec'] = score[:,1]
ss.to_csv('Submission.csv', index=False)
ss.head()

Unnamed: 0,id,team_A_scoring_within_10sec,team_B_scoring_within_10sec
0,0,0.546195,0.453805
1,1,0.625528,0.374472
2,2,0.326114,0.673886
3,3,0.492999,0.507001
4,4,0.372334,0.627666
