In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
train_data = pd.read_csv('/kaggle/input/pubg-finish-placement-prediction/train_V2.csv')
test_data = pd.read_csv('/kaggle/input/pubg-finish-placement-prediction/test_V2.csv')

In [None]:
train_data.dtypes

In [None]:
train_data.head()

In [None]:
train_data.describe()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=[14,18])
features=['assists', 'damageDealt', 'headshotKills', 'kills', 'winPoints', 'boosts', 'DBNOs',
          'heals','killPlace','killPoints','killStreaks','longestKill','matchDuration',
          'maxPlace', 'numGroups', 'rankPoints', 'revives', 'rideDistance',  'roadKills',            
          'swimDistance', 'teamKills', 'vehicleDestroys', 'walkDistance', 'weaponsAcquired', 'winPoints']
n=1
for f in features:
    plt.subplot(10,4,n)
    sns.distplot(train_data[f], kde=False)
    sns.despine()
    n=n+1
plt.tight_layout()
plt.show()


In [None]:

X = train_data[features].astype(float)
y = train_data['winPlacePerc']

id_data = test_data['Id']
X_test = test_data[features].astype(float)

In [None]:
y.fillna(y.median(), inplace=True)

In [None]:
from sklearn.preprocessing import StandardScaler

# Scale training and test sets with standard scaler 
# in order to have faster training (bigger gradient descent)
scaler = StandardScaler()
X_scaled = pd.DataFrame(scaler.fit_transform(X.values), columns=X.columns)
X_test_scaled = pd.DataFrame(scaler.fit_transform(X_test.values), columns=X_test.columns)

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_dev, y_train, y_dev = train_test_split(X_scaled, y, test_size=0.10, random_state=32)

In [None]:
import tensorflow as tf

model = tf.keras.Sequential(
    [
        tf.keras.layers.Dense(32, activation="relu", input_shape=(X_train.shape[1],)),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dense(64, activation="relu"),
        tf.keras.layers.Dense(1, activation="sigmoid"),
    ]
)

model.summary()         
model.compile(optimizer='adam', loss='mae')

In [None]:
epochs = 5

history = model.fit(x=X_train, y=y_train, epochs=epochs, batch_size=256, verbose=1, validation_data=(X_dev, y_dev), use_multiprocessing=True, workers=5)

In [None]:
import matplotlib.pyplot as plt


loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(loss) + 1)

plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
predictions = model.predict(X_test_scaled, verbose=1, workers=5, use_multiprocessing=True)

In [None]:
sub_data = pd.DataFrame(predictions, columns=['winPlacePerc'])
sub_data.insert(0, 'Id', id_data)

sub_data.to_csv('/kaggle/working/submission.csv', index=False)

In [None]:
print(sub_data)