In [None]:
import numpy as np
import pandas as pd

from tensorflow import keras

import gc
gc.enable()


def feature_engineering(is_train=True):
    if is_train:
        print("processing train_V2.csv")
        df = pd.read_csv('/kaggle/input/pubg-finish-placement-prediction/train_V2.csv')
        df = df[df['maxPlace'] > 1]
    else:
        print("processing test_V2.csv")
        df = pd.read_csv('/kaggle/input/pubg-finish-placement-prediction/test_V2.csv')

    df['totalDistance'] = df['rideDistance'] + df['walkDistance'] + df['swimDistance']
    df['rankPoints'] = np.where(df['rankPoints'] <= 0, 0, df['rankPoints'])

    target = 'winPlacePerc'
    features = list(df.columns)
    features.remove("Id")
    features.remove("matchId")
    features.remove("groupId")
    features.remove("matchDuration")
    features.remove("matchType")

    y = None

    if is_train:
        y = np.array(df.groupby(['matchId', 'groupId'])[target].agg('mean'), dtype=np.float64)
        features.remove(target)

    agg = df.groupby(['matchId', 'groupId'])[features].agg('mean')
    agg_rank = agg.groupby('matchId')[features].rank(pct=True).reset_index()

    if is_train:
        df_out = agg.reset_index()[['matchId', 'groupId']]
    else:
        df_out = df[['matchId', 'groupId']]

    df_out = df_out.merge(agg.reset_index(), suffixes=["", ""], how='left', on=['matchId', 'groupId'])
    df_out = df_out.merge(agg_rank, suffixes=["_mean", "_mean_rank"], how='left', on=['matchId', 'groupId'])

    agg = df.groupby(['matchId', 'groupId'])[features].agg('max')
    agg_rank = agg.groupby('matchId')[features].rank(pct=True).reset_index()

    df_out = df_out.merge(agg.reset_index(), suffixes=["", ""], how='left', on=['matchId', 'groupId'])
    df_out = df_out.merge(agg_rank, suffixes=["_max", "_max_rank"], how='left', on=['matchId', 'groupId'])

    agg = df.groupby(['matchId', 'groupId'])[features].agg('min')
    agg_rank = agg.groupby('matchId')[features].rank(pct=True).reset_index()

    df_out = df_out.merge(agg.reset_index(), suffixes=["", ""], how='left', on=['matchId', 'groupId'])
    df_out = df_out.merge(agg_rank, suffixes=["_min", "_min_rank"], how='left', on=['matchId', 'groupId'])

    agg = df.groupby(['matchId', 'groupId']).size().reset_index(name='group_size')
    df_out = df_out.merge(agg, how='left', on=['matchId', 'groupId'])

    agg = df.groupby(['matchId'])[features].agg('mean').reset_index()
    df_out = df_out.merge(agg, suffixes=["", "_match_mean"], how='left', on=['matchId'])

    agg = df.groupby(['matchId', 'groupId']).size().groupby(['matchId']).size().reset_index(name='match_size')
    df_out = df_out.merge(agg, how='left', on=['matchId'])

    df_out.drop(["matchId", "groupId"], axis=1, inplace=True)

    X = np.array(df_out, dtype=np.float64)

    del df, df_out, agg, agg_rank
    gc.collect()

    return X, y


x_train, y = feature_engineering(True)

model = keras.Sequential([
    keras.layers.Dense(256, input_dim=x_train.shape[1], activation='tanh'),
    keras.layers.Dense(128, activation='tanh'),
    keras.layers.Dense(64, activation='tanh'),
    keras.layers.Dense(32, activation='tanh'),
    keras.layers.Dense(1, activation='relu')
])
model.compile(optimizer='adam', loss='mae', metrics=['mse'])
model.fit(x_train, y, batch_size=2000, epochs=15, verbose=1)

del x_train, y
gc.collect()

x_test, _ = feature_engineering(False)

pred = model.predict(x_test)
del x_test
gc.collect()

df_test = pd.read_csv('/kaggle/input/pubg-finish-placement-prediction/test_V2.csv')

print("fix winPlacePerc")
for i in range(len(df_test)):
    winPlacePerc = pred[i]
    maxPlace = int(df_test.iloc[i]['maxPlace'])
    if maxPlace == 0:
        winPlacePerc = 0.0
    elif maxPlace == 1:
        winPlacePerc = 1.0

    if winPlacePerc < 0:
        winPlacePerc = 0.0
    if winPlacePerc > 1:
        winPlacePerc = 1.0
    pred[i] = winPlacePerc

df_test['winPlacePerc'] = pred

submission = df_test[['Id', 'winPlacePerc']]
submission.to_csv('submission.csv', index=False)

 