In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from catboost import CatBoostRegressor
from sklearn.metrics import mean_absolute_error
from sklearn.preprocessing import StandardScaler,MinMaxScaler

In [0]:
train=pd.read_csv('../input/train_V2.csv')
test=pd.read_csv('../input/test_V2.csv')
ID=test['Id']

In [0]:
train.isna().sum()

Id                 0
groupId            0
matchId            0
assists            0
boosts             0
damageDealt        0
DBNOs              0
headshotKills      0
heals              0
killPlace          0
killPoints         0
kills              0
killStreaks        0
longestKill        0
matchDuration      0
matchType          0
maxPlace           0
numGroups          0
rankPoints         0
revives            0
rideDistance       0
roadKills          0
swimDistance       0
teamKills          0
vehicleDestroys    0
walkDistance       0
weaponsAcquired    0
winPoints          0
winPlacePerc       1
dtype: int64

In [0]:
train=train.dropna(axis=0)

In [0]:
y_train=train['winPlacePerc']
train=train.drop(['winPlacePerc'],axis=1)

In [0]:
train["playersInMatch"] = train.groupby("matchId")["Id"].transform("count")
train["playersInGroup"] = train.groupby("groupId")["Id"].transform("count")

test["playersInMatch"] = test.groupby("matchId")["Id"].transform("count")
test["playersInGroup"] = test.groupby("groupId")["Id"].transform("count")

In [0]:
train['TotalKills'] = train.groupby('groupId')['kills'].transform('sum')
test['TotalKills'] = test.groupby('groupId')['kills'].transform('sum')

In [0]:
train['FirstMan'] = train.groupby('groupId')['matchDuration'].transform('min')
test['FirstMan'] = test.groupby('groupId')['matchDuration'].transform('min')

In [0]:
train['LastMan'] = train.groupby('groupId')['matchDuration'].transform('max')
test['LastMan'] = test.groupby('groupId')['matchDuration'].transform('max')

In [0]:
train['Survival'] = train['LastMan'] - train['FirstMan']
test['Survival'] = test['LastMan'] - test['FirstMan']

In [0]:
train['Position'] = train['killPlace'] / (train['maxPlace'] + 1e-9)
test['Position'] = test['killPlace'] / (test['maxPlace'] + 1e-9)

In [0]:
train.drop(["matchId","groupId",'Id','killPoints', 'maxPlace', 'winPoints','vehicleDestroys'],axis=1,inplace=True)
test.drop(["matchId","groupId",'Id','killPoints', 'maxPlace', 'winPoints','vehicleDestroys'],axis=1,inplace=True)

In [0]:
train['headshotrate'] = train['kills'] / (train['headshotKills'] + 1e-9)
test['headshotrate'] = test['kills'] / (test['headshotKills'] + 1e-9)

train['killStreakrate'] = train['killStreaks'] / (train['kills'] + 1e-9)
test['killStreakrate'] = test['killStreaks'] / (test['kills'] + 1e-9)

In [0]:
train['TotalDamage'] = train['damageDealt'] + train['teamKills']*100
test['TotalDamage'] = test['damageDealt'] + test['teamKills']*100

In [0]:
train['Noob']=(train['matchDuration'] < train['matchDuration'].mean() )
test['Noob']=(test['matchDuration'] < train['matchDuration'].mean() )

In [0]:
train['Sniper']=(train['longestKill']>=250)
test['Sniper']=(test['longestKill']>=250)

In [0]:
train['ProAim']= (train['headshotKills']/(train['kills']+1e-9))
test['ProAim']= (test['headshotKills']/(test['kills']+1e-9))

In [0]:
train['distance'] = (train['rideDistance']+train['swimDistance']+train['walkDistance'])
test['distance'] = (test['rideDistance']+test['swimDistance']+test['walkDistance'])
    
train['distance'] = np.log1p(train['distance'])
test['distance'] = np.log1p(test['distance'])

In [0]:
set1=set(i for i in train[(train['kills']>40) & (train['heals']==0)].index.tolist())
set2=set(i for i in train[(train['distance']==0) & (train['kills']>20) ].index.tolist())
set3=set(i for i in train[(train['damageDealt']>4000) & (train['heals']<2)].index.tolist())
set4=set(i for i in train[(train['rideDistance']>25000)].index.tolist())
set5=set(i for i in train[(train['killStreaks']>3) & (train['weaponsAcquired']> 30)].index.tolist())
sets=set1 | set2 | set3 | set4 | set5

In [0]:
len(sets)

180

In [0]:
train=train.drop(list(sets))
y_train=y_train.drop(list(sets))


In [0]:
train.shape

(4446785, 35)

In [0]:
fpp=['crashfpp','duo-fpp','flare-fpp','normal-duo-fpp','normal-solo-fpp','normal-squad-fpp','solo-fpp','squad-fpp']
train["fpp"] = np.where(train["matchType"].isin(fpp),1,0)
test["fpp"] = np.where(test["matchType"].isin(fpp),1,0)

In [0]:
change={'crashfpp':'crash',
        'crashtpp':'crash',
        'duo':'duo',
        'duo-fpp':'duo',
        'flarefpp':'flare',
        'flaretpp':'flare',
        'normal-duo':'duo',
        'normal-duo-fpp':'duo',
        'normal-solo':'solo',
        'normal-solo-fpp':'solo',
        'normal-squad':'squad',
        'normal-squad-fpp':'squad',
        'solo-fpp':'solo',
        'squad-fpp':'squad',
        'solo':'solo',
        'squad':'squad'
       }
train['matchType']=train['matchType'].map(change)
test['matchType']=test['matchType'].map(change)

In [0]:
modes={'crash':1,
       'duo':2,
       'flare':3,
       'solo':4,
       'squad':5
      }
train['matchType']=train['matchType'].map(modes)
test['matchType']=test['matchType'].map(modes)

In [0]:
d1=pd.get_dummies(train['matchType'])
train=train.drop(['matchType'],axis=1)
train=train.join(d1)
    
d2=pd.get_dummies(test['matchType'])
test=test.drop(['matchType'],axis=1)
test=test.join(d2)
    

In [0]:
scaler = MinMaxScaler()
scaler.fit(train)
train=scaler.transform(train)
test=scaler.transform(test)

In [0]:
df = pd.DataFrame(train)
df.isnull().sum()

0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
23    0
24    0
25    0
26    0
27    0
28    0
29    0
30    0
31    0
32    0
33    0
34    0
35    0
36    0
37    0
38    0
39    0
dtype: int64

In [0]:
X_train,X_test,y_train,y_test= train_test_split(train,y_train,test_size=0.3)

In [0]:
lm = Lasso(alpha=1e-5)
lm.fit(X_train,y_train)

Lasso(alpha=1e-05, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

In [0]:
train_mse = (mean_absolute_error(y_train,lm.predict(X_train)))
test_mse = (mean_absolute_error(y_test, lm.predict(X_test)))
train_mse,test_mse

(0.08178792949241988, 0.0816802427470802)

In [0]:
y_train = y_train - lm.predict(X_train)
y_test = y_test - lm.predict(X_test)

In [0]:
from catboost import Pool
train_pool = Pool(X_train, y_train)
test_pool = Pool(X_test, y_test) 

In [0]:
model = CatBoostRegressor(
    iterations=5000,
    depth=10,
    learning_rate=0.1,
    l2_leaf_reg= 2,
    loss_function='RMSE',
    eval_metric='MAE',
    random_strength=0.1,
    bootstrap_type='Bernoulli',
    leaf_estimation_method='Gradient',
    leaf_estimation_iterations=1,
    boosting_type='Plain'
    ,task_type = "GPU"
    ,feature_border_type='GreedyLogSum'
    ,random_seed=1234
)

In [0]:
model.fit(train_pool, eval_set=test_pool)

0:	learn: 0.0794962	test: 0.0793805	best: 0.0793805 (0)	total: 33ms	remaining: 2m 45s
1:	learn: 0.0775736	test: 0.0774564	best: 0.0774564 (1)	total: 64.9ms	remaining: 2m 42s
2:	learn: 0.0759517	test: 0.0758353	best: 0.0758353 (2)	total: 97.4ms	remaining: 2m 42s
3:	learn: 0.0744899	test: 0.0743776	best: 0.0743776 (3)	total: 134ms	remaining: 2m 47s
4:	learn: 0.0733036	test: 0.0731957	best: 0.0731957 (4)	total: 164ms	remaining: 2m 43s
5:	learn: 0.0721827	test: 0.0720767	best: 0.0720767 (5)	total: 195ms	remaining: 2m 42s
6:	learn: 0.0711370	test: 0.0710323	best: 0.0710323 (6)	total: 225ms	remaining: 2m 40s
7:	learn: 0.0702834	test: 0.0701816	best: 0.0701816 (7)	total: 257ms	remaining: 2m 40s
8:	learn: 0.0696069	test: 0.0695052	best: 0.0695052 (8)	total: 297ms	remaining: 2m 44s
9:	learn: 0.0687894	test: 0.0686857	best: 0.0686857 (9)	total: 329ms	remaining: 2m 44s
10:	learn: 0.0681178	test: 0.0680168	best: 0.0680168 (10)	total: 363ms	remaining: 2m 44s
11:	learn: 0.0675859	test: 0.0674892	bes

<catboost.core.CatBoostRegressor at 0x7fa3d989d908>

In [0]:
train_mse =(mean_absolute_error(y_train,lm.predict(X_train) + model.predict(X_train)))
test_mse =(mean_absolute_error(y_test, lm.predict(X_test) + model.predict(X_test)))
    
print('Train error= ',train_mse)
print('Test error= ',test_mse)


Train error=  0.47767038690836666
Test error=  0.4785083870604905
