# LightBGM applied on DUO & SQUAD & OTHERS modes

# 1. Classification

In [1]:
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from mlxtend.evaluate import feature_importance_permutation
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
pd.set_option('display.max_columns', None) # 看dataframe所有列

In [96]:
dat_final = pd.read_csv('./data_final.csv')

In [4]:
dat_final.shape

(4445191, 30)

In [28]:
dat_final.head(12)

Unnamed: 0.1,Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,matchType,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
0,0,7f96b2f878858a,4d4b580de459be,a10357fd1a4a91,0,0,0.0,0,0,0,60,1241,0,0,0.0,1306,squad-fpp,28,26,-1,0,0.0,0,0.0,0,0,244.8,1,1466,0.4444
1,1,eef90569b9d03c,684d5656442f9e,aeb375fc57110c,0,0,91.47,0,0,0,57,0,0,0,0.0,1777,squad-fpp,26,25,1484,0,0.0045,0,11.04,0,0,1434.0,5,0,0.64
2,2,1eaf90ac73de72,6a4a42c3245a74,110163d8bb94ae,1,0,68.0,0,0,0,47,0,0,0,0.0,1318,duo,50,47,1491,0,0.0,0,0.0,0,0,161.8,2,0,0.7755
3,3,4616d365dd2853,a930a9c79cd721,f1f1f4ef412d7e,0,0,32.9,0,0,0,75,0,0,0,0.0,1436,squad-fpp,31,30,1408,0,0.0,0,0.0,0,0,202.7,3,0,0.1667
4,4,315c96c26c9aac,de04010b3458dd,6dc8ff871e21e6,0,0,100.0,0,0,0,45,0,1,1,58.53,1424,solo-fpp,97,95,1560,0,0.0,0,0.0,0,0,49.75,2,0,0.1875
5,5,ff79c12f326506,289a6836a88d27,bac52627a12114,0,0,100.0,1,1,0,44,0,1,1,18.44,1395,squad-fpp,28,28,1418,0,0.0,0,0.0,0,0,34.7,1,0,0.037
6,6,95959be0e21ca3,2c485a1ad3d0f1,a8274e903927a2,0,0,0.0,0,0,0,96,1262,0,0,0.0,1316,squad-fpp,28,28,-1,0,0.0,0,0.0,0,0,13.5,1,1497,0.0
7,7,311b84c6ff4390,eaba5fcb7fc1ae,292611730ca862,0,0,8.538,0,0,0,48,1000,0,0,0.0,1967,solo-fpp,96,92,-1,0,2004.0,0,0.0,0,0,1089.0,6,1500,0.7368
8,8,1a68204ccf9891,47cfbb04e1b1a2,df014fbee741c6,0,0,51.6,0,0,0,64,0,0,0,0.0,1375,squad,28,27,1493,0,0.0,0,0.0,0,0,799.9,4,0,0.3704
9,9,e5bb5a43587253,759bb6f7514fd2,3d3031c795305b,0,0,37.27,0,0,0,74,0,0,0,0.0,1930,squad,29,27,1349,0,0.0,0,0.0,0,0,65.67,1,0,0.2143


# DUO

In [5]:
clas = np.unique(dat_final['matchType'])
clas

array(['crashfpp', 'crashtpp', 'duo', 'duo-fpp', 'flarefpp', 'flaretpp',
       'normal-duo', 'normal-duo-fpp', 'normal-solo', 'normal-solo-fpp',
       'normal-squad', 'normal-squad-fpp', 'solo', 'solo-fpp', 'squad',
       'squad-fpp'], dtype=object)

In [6]:
condition_duo = [var for var in clas if 'duo' in var]

In [7]:
condition_duo

['duo', 'duo-fpp', 'normal-duo', 'normal-duo-fpp']

In [8]:
duodata_index = [i for i in range(dat_final.shape[0]) if dat_final['matchType'][i] in condition_duo]

In [9]:
duodata_index[:5]

[2, 11, 15, 20, 22]

In [10]:
len(duodata_index)

1315806

In [11]:
duodata = dat_final.iloc[duodata_index]

In [12]:
duodata.head()

Unnamed: 0.1,Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,matchType,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
2,2,1eaf90ac73de72,6a4a42c3245a74,110163d8bb94ae,1,0,68.0,0,0,0,47,0,0,0,0.0,1318,duo,50,47,1491,0,0.0,0,0.0,0,0,161.8,2,0,0.7755
11,11,8de328a74658a9,f643df9df3877c,80170383d90003,0,0,137.9,1,0,0,64,0,0,0,0.0,1384,duo-fpp,48,46,1488,0,0.0,0,0.0,0,0,451.7,1,0,0.4043
15,15,12d8d4bd94312c,fe52d481bae68b,6fd9e765ddd0c5,0,0,80.71,1,0,0,72,0,0,0,0.0,1313,duo-fpp,48,48,1495,1,0.0,0,0.0,0,0,105.1,5,0,0.2766
20,20,02ace8c6e58461,a4bc548028f800,80f2b8448e474b,0,4,269.1,0,1,8,18,0,2,1,7.438,1890,duo-fpp,48,48,1491,1,2734.0,0,0.0,0,0,1794.0,5,0,0.6383
22,22,9b2961d4d51f91,799d0a4d61dc3c,e833ca2282169d,0,1,192.3,1,2,3,15,1022,2,1,280.6,1775,duo-fpp,49,48,-1,0,2332.0,0,0.0,0,0,1264.0,4,1494,0.75


In [13]:
duodata.shape

(1315806, 30)

In [14]:
duodata.columns

Index(['Unnamed: 0', 'Id', 'groupId', 'matchId', 'assists', 'boosts',
       'damageDealt', 'DBNOs', 'headshotKills', 'heals', 'killPlace',
       'killPoints', 'kills', 'killStreaks', 'longestKill', 'matchDuration',
       'matchType', 'maxPlace', 'numGroups', 'rankPoints', 'revives',
       'rideDistance', 'roadKills', 'swimDistance', 'teamKills',
       'vehicleDestroys', 'walkDistance', 'weaponsAcquired', 'winPoints',
       'winPlacePerc'],
      dtype='object')

In [15]:
pd.value_counts(duodata['numGroups']).head()

47    268912
46    235103
48    209445
45    184850
44    117373
Name: numGroups, dtype: int64

In [29]:
pd.value_counts(duodata['groupId']).head()

b8275198faa03b    72
7385e5fe214021    49
d255538063c143    16
34e1b70001b6b3    16
35199c507a947d    14
Name: groupId, dtype: int64

In [34]:
sum(pd.value_counts(duodata['groupId']) != 2)

88791

In [35]:
sum(pd.value_counts(duodata['groupId']) == 2)

552911

# One group with more than 70 people???

In [31]:
spe = duodata[duodata['groupId'] == 'b8275198faa03b']

In [33]:
spe.head()

Unnamed: 0.1,Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,matchType,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
12507,12510,305f18f6977ff6,b8275198faa03b,3e029737889ce9,0,0,0.0,0,0,0,46,1462,0,0,0.0,1501,duo-fpp,50,14,-1,0,0.0,0,0.0,0,0,1056.0,3,1607,0.2449
146847,146910,6f5d3d368ee743,b8275198faa03b,3e029737889ce9,0,0,208.4,2,0,0,1,1648,3,2,80.57,1501,duo-fpp,50,14,-1,0,0.0,0,0.0,0,0,421.3,4,1545,0.2449
157734,157801,4e12f9f6e36118,b8275198faa03b,3e029737889ce9,0,0,17.33,0,0,0,36,1002,0,0,0.0,1501,duo-fpp,50,14,-1,0,0.0,0,0.0,0,0,254.4,3,1494,0.2449
372213,372370,0dc8f2b7ae5885,b8275198faa03b,3e029737889ce9,0,0,0.0,0,0,0,39,1581,0,0,0.0,1501,duo-fpp,50,14,-1,0,0.0,0,0.0,0,0,270.3,2,1589,0.2449
386713,386871,93ccf581ed88c7,b8275198faa03b,3e029737889ce9,0,0,66.88,0,0,0,76,1630,0,0,0.0,1501,duo-fpp,50,14,-1,0,0.0,0,0.0,0,0,422.1,4,1585,0.2449


In [21]:
duodata_merge = duodata.groupby('groupId').mean()

In [22]:
duodata_merge.shape

(641702, 26)

In [23]:
duodata_merge.head()

Unnamed: 0_level_0,Unnamed: 0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
groupId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
00003a54230763,813531.5,0.5,0.0,50.0,0.5,0.5,0.0,54.5,1013.0,0.5,0.5,1.5855,2190.0,44.0,42.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,92.675,2.5,1501.0,0.1395
000077d5217a7c,2769281.0,0.0,0.5,0.0,0.0,0.0,0.0,47.5,1000.0,0.0,0.0,0.0,1345.0,48.0,47.0,-1.0,0.0,0.0,0.0,2.544,0.0,0.0,1355.0,3.5,1500.0,0.8298
00009c612cb5dc,3837977.0,0.0,1.0,9.19,0.5,0.0,3.5,59.5,0.0,0.0,0.0,0.0,1939.0,46.0,46.0,1499.0,0.5,3060.5,0.0,0.0,0.0,0.0,721.4,2.0,0.0,0.5333
00009db03ac5d7,2082202.0,0.0,0.0,172.95,1.0,0.0,0.5,48.5,1519.0,1.0,0.5,11.68,1869.0,50.0,50.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,157.35,3.0,1604.5,0.3878
0000b3a8f7db43,1807155.0,0.0,0.0,13.543333,0.0,0.0,0.0,93.0,1143.333333,0.0,0.0,0.0,1971.0,49.0,49.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,119.13,1.666667,1532.333333,0.0417


In [38]:
rm_feature = ['Unnamed: 0']
len(rm_feature)

1

In [39]:
duodata_merge_final = duodata_merge.drop(rm_feature, axis=1)
duodata_merge_final.head()

Unnamed: 0_level_0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
groupId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
00003a54230763,0.5,0.0,50.0,0.5,0.5,0.0,54.5,1013.0,0.5,0.5,1.5855,2190.0,44.0,42.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,92.675,2.5,1501.0,0.1395
000077d5217a7c,0.0,0.5,0.0,0.0,0.0,0.0,47.5,1000.0,0.0,0.0,0.0,1345.0,48.0,47.0,-1.0,0.0,0.0,0.0,2.544,0.0,0.0,1355.0,3.5,1500.0,0.8298
00009c612cb5dc,0.0,1.0,9.19,0.5,0.0,3.5,59.5,0.0,0.0,0.0,0.0,1939.0,46.0,46.0,1499.0,0.5,3060.5,0.0,0.0,0.0,0.0,721.4,2.0,0.0,0.5333
00009db03ac5d7,0.0,0.0,172.95,1.0,0.0,0.5,48.5,1519.0,1.0,0.5,11.68,1869.0,50.0,50.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,157.35,3.0,1604.5,0.3878
0000b3a8f7db43,0.0,0.0,13.543333,0.0,0.0,0.0,93.0,1143.333333,0.0,0.0,0.0,1971.0,49.0,49.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,119.13,1.666667,1532.333333,0.0417


In [40]:
duodata_merge_final.shape

(641702, 25)

In [43]:
duodata_merge_final.index = range(duodata_merge_final.shape[0])

In [44]:
duodata_merge_final.head()

Unnamed: 0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
0,0.5,0.0,50.0,0.5,0.5,0.0,54.5,1013.0,0.5,0.5,1.5855,2190.0,44.0,42.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,92.675,2.5,1501.0,0.1395
1,0.0,0.5,0.0,0.0,0.0,0.0,47.5,1000.0,0.0,0.0,0.0,1345.0,48.0,47.0,-1.0,0.0,0.0,0.0,2.544,0.0,0.0,1355.0,3.5,1500.0,0.8298
2,0.0,1.0,9.19,0.5,0.0,3.5,59.5,0.0,0.0,0.0,0.0,1939.0,46.0,46.0,1499.0,0.5,3060.5,0.0,0.0,0.0,0.0,721.4,2.0,0.0,0.5333
3,0.0,0.0,172.95,1.0,0.0,0.5,48.5,1519.0,1.0,0.5,11.68,1869.0,50.0,50.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,157.35,3.0,1604.5,0.3878
4,0.0,0.0,13.543333,0.0,0.0,0.0,93.0,1143.333333,0.0,0.0,0.0,1971.0,49.0,49.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,119.13,1.666667,1532.333333,0.0417


In [65]:
#duodata_merge_final.to_csv('duo_data_final.csv', index=False)

# SQUAD

In [46]:
condition_squ = [var for var in clas if 'squad' in var]

In [49]:
squdata_index = [i for i in range(dat_final.shape[0]) if dat_final['matchType'][i] in condition_squ]

In [50]:
squdata = dat_final.iloc[squdata_index]

In [51]:
squdata.shape

(2399298, 30)

In [52]:
squdata.head()

Unnamed: 0.1,Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,matchType,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
0,0,7f96b2f878858a,4d4b580de459be,a10357fd1a4a91,0,0,0.0,0,0,0,60,1241,0,0,0.0,1306,squad-fpp,28,26,-1,0,0.0,0,0.0,0,0,244.8,1,1466,0.4444
1,1,eef90569b9d03c,684d5656442f9e,aeb375fc57110c,0,0,91.47,0,0,0,57,0,0,0,0.0,1777,squad-fpp,26,25,1484,0,0.0045,0,11.04,0,0,1434.0,5,0,0.64
3,3,4616d365dd2853,a930a9c79cd721,f1f1f4ef412d7e,0,0,32.9,0,0,0,75,0,0,0,0.0,1436,squad-fpp,31,30,1408,0,0.0,0,0.0,0,0,202.7,3,0,0.1667
5,5,ff79c12f326506,289a6836a88d27,bac52627a12114,0,0,100.0,1,1,0,44,0,1,1,18.44,1395,squad-fpp,28,28,1418,0,0.0,0,0.0,0,0,34.7,1,0,0.037
6,6,95959be0e21ca3,2c485a1ad3d0f1,a8274e903927a2,0,0,0.0,0,0,0,96,1262,0,0,0.0,1316,squad-fpp,28,28,-1,0,0.0,0,0.0,0,0,13.5,1,1497,0.0


In [53]:
squdata_merge = squdata.groupby('groupId').mean()

In [54]:
squdata_merge.head()

Unnamed: 0_level_0,Unnamed: 0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
groupId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
00000c08b5be36,2361922.0,0.0,0.333333,247.166667,1.666667,0.333333,0.333333,50.333333,0.0,0.666667,0.666667,9.051667,1429.0,26.0,26.0,1457.333333,0.0,0.0,0.0,0.0,0.0,0.0,271.533333,2.333333,0.0,0.2
00000d1cbbc340,67943.0,0.0,0.0,173.7,0.0,0.0,0.0,40.0,0.0,1.0,1.0,1.964,1196.0,27.0,24.0,1551.0,0.0,0.0,0.0,0.0,0.0,0.0,135.8,3.0,0.0,0.1154
000038ec4dff53,1446551.0,0.666667,2.333333,263.556667,1.666667,0.333333,3.0,13.0,0.0,2.0,1.0,53.51,1470.0,27.0,27.0,1516.0,0.666667,0.0,0.0,74.923333,0.0,0.0,2082.666667,5.666667,0.0,0.9615
000049feba5c83,2369093.0,0.333333,3.333333,111.053333,1.666667,0.0,8.0,25.0,1000.0,1.0,0.666667,14.473333,1220.0,32.0,32.0,-1.0,0.666667,440.5,0.0,6.556667,0.0,0.0,2482.666667,4.0,1500.0,0.8387
000073397f671b,2056099.0,0.333333,4.0,133.466667,0.333333,0.0,0.333333,36.333333,0.0,0.666667,0.666667,46.216667,1382.0,28.0,25.0,1430.666667,0.333333,558.633333,0.0,100.703333,0.0,0.0,2636.0,6.333333,0.0,0.7778


In [55]:
squdata_merge.shape

(687280, 26)

In [56]:
squdata_merge_final = squdata_merge.drop(squdata_merge.columns[0],axis=1)

In [58]:
squdata_merge_final.shape

(687280, 25)

In [59]:
squdata_merge_final.index = range(squdata_merge_final.shape[0])

In [60]:
squdata_merge_final.head()

Unnamed: 0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
0,0.0,0.333333,247.166667,1.666667,0.333333,0.333333,50.333333,0.0,0.666667,0.666667,9.051667,1429.0,26.0,26.0,1457.333333,0.0,0.0,0.0,0.0,0.0,0.0,271.533333,2.333333,0.0,0.2
1,0.0,0.0,173.7,0.0,0.0,0.0,40.0,0.0,1.0,1.0,1.964,1196.0,27.0,24.0,1551.0,0.0,0.0,0.0,0.0,0.0,0.0,135.8,3.0,0.0,0.1154
2,0.666667,2.333333,263.556667,1.666667,0.333333,3.0,13.0,0.0,2.0,1.0,53.51,1470.0,27.0,27.0,1516.0,0.666667,0.0,0.0,74.923333,0.0,0.0,2082.666667,5.666667,0.0,0.9615
3,0.333333,3.333333,111.053333,1.666667,0.0,8.0,25.0,1000.0,1.0,0.666667,14.473333,1220.0,32.0,32.0,-1.0,0.666667,440.5,0.0,6.556667,0.0,0.0,2482.666667,4.0,1500.0,0.8387
4,0.333333,4.0,133.466667,0.333333,0.0,0.333333,36.333333,0.0,0.666667,0.666667,46.216667,1382.0,28.0,25.0,1430.666667,0.333333,558.633333,0.0,100.703333,0.0,0.0,2636.0,6.333333,0.0,0.7778


In [63]:
#squdata_merge_final.to_csv('squad_data_final.csv', index=False)

# Other mode

In [98]:
np.unique(dat_final['matchType'])

array(['crashfpp', 'crashtpp', 'duo', 'duo-fpp', 'flarefpp', 'flaretpp',
       'normal-duo', 'normal-duo-fpp', 'normal-solo', 'normal-solo-fpp',
       'normal-squad', 'normal-squad-fpp', 'solo', 'solo-fpp', 'squad',
       'squad-fpp'], dtype=object)

In [103]:
condition_other = ['crashfpp', 'crashtpp', 'flarefpp', 'flaretpp']

In [104]:
otherdata_index = [i for i in range(dat_final.shape[0]) if dat_final['matchType'][i] in condition_other]

In [126]:
otherdata = dat_final.iloc[otherdata_index]

In [127]:
otherdata.shape

(9881, 30)

In [124]:
otherdata.head()

Unnamed: 0.1,Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,matchType,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
1093,1093,c8ed6a171536e3,84748458aba82a,d4f1811cf6a04b,1,3,187.4,0,1,6,27,0,1,1,0.8007,904,crashfpp,50,45,1500,0,0.0,0,0.0,0,0,1342.0,1,0,0.4898
1207,1207,fb785deb59f2bc,4438f77ac9f2e6,33d976b454b843,0,4,576.8,7,2,4,6,0,4,2,208.5,1947,flaretpp,26,25,1500,1,2548.0,0,0.0,0,1,2563.0,6,0,0.8
1276,1276,d3c4dd2e585d21,6af9bb6b56b722,16e6befa897b44,0,0,0.0,0,0,0,88,0,0,0,0.0,892,crashfpp,47,45,1500,0,0.0,0,0.0,0,0,0.0,0,0,0.0
1524,1524,b0fbbe07014fcd,7ce6194a5dd609,e330f44c528e6f,0,0,20.94,0,0,0,55,0,0,0,0.0,2031,flarefpp,17,17,1500,0,0.0,0,0.0,0,0,13.64,1,0,0.0625
1790,1790,28390372a2cc4f,c529d05da4597b,be945f2803814a,0,0,0.0,0,0,0,76,0,0,0,0.0,915,crashfpp,50,50,1500,0,393.7,0,0.0,0,0,459.6,0,0,0.2041


In [113]:
pd.value_counts(otherdata['groupId']).head()

f31091a0ccfce5    18
1e5c0ecdc2a193    17
70b3c40f721471    15
5bd1a72854f2c2    14
aa692a1c2b5b83    14
Name: groupId, dtype: int64

In [118]:
otherdata[otherdata['groupId'] == '1e5c0ecdc2a193'].head()

Unnamed: 0.1,Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,matchType,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
132565,132624,f2e09d246c3830,1e5c0ecdc2a193,5fe982934b79aa,1,0,144.6,0,0,0,57,0,0,0,0.0,1971,flarefpp,19,16,1500,0,0.0,0,0.0,0,0,35.12,1,0,0.1111
149435,149498,a58b68044fc71e,1e5c0ecdc2a193,5fe982934b79aa,0,0,55.38,2,1,0,12,0,2,1,2.548,1971,flarefpp,19,16,1500,0,0.0,0,0.0,0,0,142.5,3,0,0.1111
645653,645915,92ce984df8fb23,1e5c0ecdc2a193,5fe982934b79aa,0,0,0.0,2,0,0,52,0,0,0,0.0,1971,flarefpp,19,16,1500,0,0.0,0,0.0,2,0,194.1,4,0,0.1111
969706,970103,f565124d5d2ce3,1e5c0ecdc2a193,5fe982934b79aa,0,0,74.82,0,0,0,51,0,0,0,0.0,1971,flarefpp,19,16,1500,0,0.0,0,0.0,0,0,339.4,1,0,0.1111
1192641,1193154,5bce7f1c92d214,1e5c0ecdc2a193,5fe982934b79aa,0,0,0.0,0,0,0,49,0,0,0,0.0,1971,flarefpp,19,16,1500,0,0.0,0,0.0,0,0,591.1,3,0,0.1111


In [128]:
otherdata_merge = otherdata.groupby('groupId').mean()

In [129]:
otherdata_merge.shape

(4205, 26)

In [130]:
otherdata_merge.head()

Unnamed: 0_level_0,Unnamed: 0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
groupId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
0001cad8adc6ea,1097862.0,1.5,4.75,507.5125,3.0,0.25,8.25,16.5,0.0,3.0,1.5,120.695,1867.0,24.0,23.0,1500.0,1.25,1035.225,0.0,0.0,0.25,0.25,4134.75,6.75,0.0,1.0
001959daa56f50,3176438.0,0.0,0.5,85.0,0.0,0.0,3.0,42.5,0.0,0.5,0.5,2.2625,916.0,52.0,49.0,1500.0,0.0,1674.8,0.5,0.0,0.0,0.0,1338.95,1.0,0.0,0.4902
001ca710bda472,1948301.0,0.166667,0.0,55.59,0.5,0.083333,0.0,69.5,0.0,0.333333,0.333333,2.1175,1746.0,28.0,27.0,1500.0,0.083333,0.0,0.0,0.0,0.0,0.0,135.500833,1.083333,0.0,0.037
002d040d353c68,3116764.0,0.0,0.0,60.4,0.0,0.0,0.0,82.0,0.0,0.0,0.0,0.0,907.0,49.0,44.0,1500.0,0.0,0.0,0.0,0.0,0.0,0.0,104.056667,0.333333,0.0,0.1042
002d5f7f38f8c6,3170680.0,0.75,1.75,142.005,0.75,0.25,0.25,30.0,1000.0,0.75,0.75,20.895,1870.0,15.0,15.0,-1.0,0.0,3343.5,0.0,0.0,0.0,0.0,1514.25,5.5,1500.0,0.2857


In [131]:
otherdata_merge.drop(otherdata_merge.columns[0],axis=1,inplace=True)

In [132]:
otherdata_merge.head()

Unnamed: 0_level_0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
groupId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1
0001cad8adc6ea,1.5,4.75,507.5125,3.0,0.25,8.25,16.5,0.0,3.0,1.5,120.695,1867.0,24.0,23.0,1500.0,1.25,1035.225,0.0,0.0,0.25,0.25,4134.75,6.75,0.0,1.0
001959daa56f50,0.0,0.5,85.0,0.0,0.0,3.0,42.5,0.0,0.5,0.5,2.2625,916.0,52.0,49.0,1500.0,0.0,1674.8,0.5,0.0,0.0,0.0,1338.95,1.0,0.0,0.4902
001ca710bda472,0.166667,0.0,55.59,0.5,0.083333,0.0,69.5,0.0,0.333333,0.333333,2.1175,1746.0,28.0,27.0,1500.0,0.083333,0.0,0.0,0.0,0.0,0.0,135.500833,1.083333,0.0,0.037
002d040d353c68,0.0,0.0,60.4,0.0,0.0,0.0,82.0,0.0,0.0,0.0,0.0,907.0,49.0,44.0,1500.0,0.0,0.0,0.0,0.0,0.0,0.0,104.056667,0.333333,0.0,0.1042
002d5f7f38f8c6,0.75,1.75,142.005,0.75,0.25,0.25,30.0,1000.0,0.75,0.75,20.895,1870.0,15.0,15.0,-1.0,0.0,3343.5,0.0,0.0,0.0,0.0,1514.25,5.5,1500.0,0.2857


In [133]:
otherdata_merge.shape

(4205, 25)

In [134]:
otherdata_merge.index = range(otherdata_merge.shape[0])

In [135]:
otherdata_merge.head()

Unnamed: 0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
0,1.5,4.75,507.5125,3.0,0.25,8.25,16.5,0.0,3.0,1.5,120.695,1867.0,24.0,23.0,1500.0,1.25,1035.225,0.0,0.0,0.25,0.25,4134.75,6.75,0.0,1.0
1,0.0,0.5,85.0,0.0,0.0,3.0,42.5,0.0,0.5,0.5,2.2625,916.0,52.0,49.0,1500.0,0.0,1674.8,0.5,0.0,0.0,0.0,1338.95,1.0,0.0,0.4902
2,0.166667,0.0,55.59,0.5,0.083333,0.0,69.5,0.0,0.333333,0.333333,2.1175,1746.0,28.0,27.0,1500.0,0.083333,0.0,0.0,0.0,0.0,0.0,135.500833,1.083333,0.0,0.037
3,0.0,0.0,60.4,0.0,0.0,0.0,82.0,0.0,0.0,0.0,0.0,907.0,49.0,44.0,1500.0,0.0,0.0,0.0,0.0,0.0,0.0,104.056667,0.333333,0.0,0.1042
4,0.75,1.75,142.005,0.75,0.25,0.25,30.0,1000.0,0.75,0.75,20.895,1870.0,15.0,15.0,-1.0,0.0,3343.5,0.0,0.0,0.0,0.0,1514.25,5.5,1500.0,0.2857


In [136]:
#otherdata_merge.to_csv('other_data_final.csv',index=False)

# 2. LightGBM

In [3]:
import lightgbm as lgb

# DUO


In [4]:
duo = pd.read_csv('./duo_data_final.csv')

In [5]:
duo.head()

Unnamed: 0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc
0,0.5,0.0,50.0,0.5,0.5,0.0,54.5,1013.0,0.5,0.5,1.5855,2190.0,44.0,42.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,92.675,2.5,1501.0,0.1395
1,0.0,0.5,0.0,0.0,0.0,0.0,47.5,1000.0,0.0,0.0,0.0,1345.0,48.0,47.0,-1.0,0.0,0.0,0.0,2.544,0.0,0.0,1355.0,3.5,1500.0,0.8298
2,0.0,1.0,9.19,0.5,0.0,3.5,59.5,0.0,0.0,0.0,0.0,1939.0,46.0,46.0,1499.0,0.5,3060.5,0.0,0.0,0.0,0.0,721.4,2.0,0.0,0.5333
3,0.0,0.0,172.95,1.0,0.0,0.5,48.5,1519.0,1.0,0.5,11.68,1869.0,50.0,50.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,157.35,3.0,1604.5,0.3878
4,0.0,0.0,13.543333,0.0,0.0,0.0,93.0,1143.333333,0.0,0.0,0.0,1971.0,49.0,49.0,-1.0,0.0,0.0,0.0,0.0,0.0,0.0,119.13,1.666667,1532.333333,0.0417


In [82]:
duo.shape    # 24+1

(641702, 25)

In [6]:
y = np.array(duo["winPlacePerc"])
X = np.array(duo.drop("winPlacePerc",axis=1))
X_train,X_test,y_train,y_test = train_test_split(X, y,
                                               test_size=0.3,
                                               random_state=123)

In [84]:
lgb.LGBMRegressor()

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       importance_type='split', learning_rate=0.1, max_depth=-1,
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [88]:
lgb_duo = lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8)
param_lgb={ 'max_depth': range(10,15,2),
                   'num_leaves':range(120, 181, 30),
                   'learning_rate':np.arange(0.13,0.2,0.03)}
gs_lgb_duo = GridSearchCV(lgb_duo, param_lgb,cv=3)

In [89]:
gs_lgb_duo.fit(X_train, y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.1, max_depth=-1, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_...=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': range(10, 15, 2), 'num_leaves': range(120, 181, 30), 'learning_rate': array([0.13, 0.16, 0.19])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [90]:
print('Best Accuracy: %.2f%%' % (gs_lgb_duo.best_score_*100))
print(gs_lgb_duo.best_params_)

Best Accuracy: 95.71%
{'learning_rate': 0.16, 'max_depth': 12, 'num_leaves': 180}


In [91]:
lgb_duo = lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8, learning_rate=0.16, max_depth=12)
param_lgb={ 'num_leaves':range(180, 241, 30),
                   'reg_alpha': np.arange(0,0.4,0.1),
                   'reg_lambda':np.arange(0,0.4,0.1)}
gs_lgb_duo = GridSearchCV(lgb_duo, param_lgb,cv=3)

In [92]:
gs_lgb_duo.fit(X_train, y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.16, max_depth=12, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n...=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'num_leaves': range(180, 241, 30), 'reg_alpha': array([0. , 0.1, 0.2, 0.3]), 'reg_lambda': array([0. , 0.1, 0.2, 0.3])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [93]:
print('Best Accuracy: %.2f%%' % (gs_lgb_duo.best_score_*100))
print(gs_lgb_duo.best_params_)

Best Accuracy: 95.74%
{'num_leaves': 240, 'reg_alpha': 0.2, 'reg_lambda': 0.30000000000000004}


In [94]:
lgb_duo = lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8, learning_rate=0.16, max_depth=12,
                           reg_alpha = 0.2, reg_lambda=0.3)
param_lgb={ 'num_leaves':range(240, 301, 30)}
gs_lgb_duo = GridSearchCV(lgb_duo, param_lgb,cv=3)

In [95]:
gs_lgb_duo.fit(X_train, y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.16, max_depth=12, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n...=0.2, reg_lambda=0.3, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'num_leaves': range(240, 301, 30)},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [99]:
print('Best Accuracy: %.2f%%' % (gs_lgb_duo.best_score_*100))
print(gs_lgb_duo.best_params_)

Best Accuracy: 95.74%
{'num_leaves': 240}


In [7]:
lgb_duo =  lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8, learning_rate=0.16, max_depth=12,
                           reg_alpha = 0.2, reg_lambda=0.3, num_leaves=240)
lgb_duo.fit(X_train , y_train)

LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.16, max_depth=12, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=240, objective='regression',
       random_state=None, reg_alpha=0.2, reg_lambda=0.3, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [16]:
print("The score on validation set is : %0.4f"  % lgb_duo.score(X_test , y_test) )
print("The mse on validation set is : %0.6f"  % mean_squared_error(y_test , lgb_duo.predict(X_test)))

The score on validation set is : 0.9576
The mse on validation set is : 0.003756


# predict

In [13]:
test_duo = pd.read_csv('./test_duo.csv')

In [14]:
test_duo.shape

(284895, 25)

In [15]:
test_duo.head()

Unnamed: 0,groupId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints
0,0000120038fb95,0.5,1.5,111.75,0.0,0.0,1.5,28.5,1262.0,1.0,0.5,31.07,1884.0,48.0,45.0,-1.0,0.5,672.0,0.0,0.0,0.0,0.0,2365.0,7.5,1501.0
1,00001e221235dd,0.0,1.0,415.95,2.0,1.0,4.0,31.0,1347.5,2.0,1.0,17.78,1337.0,50.0,47.0,-1.0,0.5,0.0,0.0,0.0,0.0,0.0,448.15,3.5,1488.0
2,0000d3df0acf5c,0.0,0.0,85.9,0.5,0.5,0.0,67.0,0.0,0.5,0.5,0.9305,1368.0,49.0,48.0,1485.0,0.0,0.0,0.0,0.0,0.0,0.0,61.585,1.5,0.0
3,0000d3eee3074c,0.5,1.5,203.5,1.5,0.5,1.0,19.5,0.0,1.5,1.0,138.3,1377.0,46.0,44.0,1514.0,0.0,1001.2,0.0,0.0,0.0,0.0,1878.0,7.0,0.0
4,00012cab978c56,1.5,6.0,520.7,2.5,1.0,6.5,10.5,0.0,5.0,1.5,91.91,1878.0,49.0,47.0,1500.0,1.5,4081.0,0.0,0.0,0.0,0.0,3155.0,6.5,0.0


In [17]:
test_duo_y = lgb_duo.predict(test_duo.drop(['groupId'],axis=1))

In [18]:
test_duo_y[test_duo_y < 0] = 0
test_duo_y[test_duo_y > 1] = 1

In [22]:
test_duo_y[:5]

array([0.83794337, 0.55426624, 0.06664795, 0.80572487, 0.95715971])

In [23]:
groupid_y = pd.DataFrame(columns=['groupId','winPlacePerc'])

In [24]:
groupid_y['groupId'] = test_duo['groupId']

In [25]:
groupid_y['winPlacePerc'] = test_duo_y

In [26]:
groupid_y.head()

Unnamed: 0,groupId,winPlacePerc
0,0000120038fb95,0.837943
1,00001e221235dd,0.554266
2,0000d3df0acf5c,0.066648
3,0000d3eee3074c,0.805725
4,00012cab978c56,0.95716


In [31]:
groupid_y.shape

(284895, 2)

In [28]:
id_groupid = pd.read_csv('./duo_id_groupid.csv')
id_groupid.head()

Unnamed: 0,Id,groupId
0,639bd0dcd7bda8,430933124148dd
1,cf5b81422591d1,b7497dbdc77f4a
2,d812d2f1d88a02,6285bb4eec83e4
3,29da9623f0e316,6a47335104d64e
4,2f42b452cf99f7,b0722175ef82c0


In [39]:
pd.value_counts(id_groupid['groupId']).head()

ffb800e70e02d6    63
96941f08d733c8    25
3fe9ef1f8414c9    22
ae81f21cc734c0    15
23e4739cd5f186    14
Name: groupId, dtype: int64

In [45]:
groupid_y[groupid_y['groupId'] == '23e4739cd5f186'].head()

Unnamed: 0,groupId,winPlacePerc
39882,23e4739cd5f186,0.014714


In [43]:
id_groupid[id_groupid['groupId'] == '23e4739cd5f186'].head()

Unnamed: 0,Id,groupId
4605,9042ee2db4890b,23e4739cd5f186
76213,1b8fb48e198e11,23e4739cd5f186
116670,a377cff6c8be88,23e4739cd5f186
121004,0b3828a94d0bb9,23e4739cd5f186
171206,870830470aeef8,23e4739cd5f186


In [44]:
duo_try1[duo_try1['groupId'] == '23e4739cd5f186'].head()

Unnamed: 0,Id,groupId,winPlacePerc
10065,9042ee2db4890b,23e4739cd5f186,0.014714
10066,1b8fb48e198e11,23e4739cd5f186,0.014714
10067,a377cff6c8be88,23e4739cd5f186,0.014714
10068,0b3828a94d0bb9,23e4739cd5f186,0.014714
10069,870830470aeef8,23e4739cd5f186,0.014714


In [32]:
id_groupid.shape

(584286, 2)

In [37]:
# 1111
duo_try1 = pd.merge(id_groupid, groupid_y, on=['groupId'])
duo_try1.head()

Unnamed: 0,Id,groupId,winPlacePerc
0,639bd0dcd7bda8,430933124148dd,0.923336
1,ef362b46754f2a,430933124148dd,0.923336
2,cf5b81422591d1,b7497dbdc77f4a,0.551914
3,271c9c5dd790f4,b7497dbdc77f4a,0.551914
4,d812d2f1d88a02,6285bb4eec83e4,0.797255


In [46]:
duo_try1.shape

(584286, 3)

In [47]:
y_duo = duo_try1.drop(['groupId'],axis=1)

In [48]:
y_duo.shape

(584286, 2)

In [49]:
y_duo.head()

Unnamed: 0,Id,winPlacePerc
0,639bd0dcd7bda8,0.923336
1,ef362b46754f2a,0.923336
2,cf5b81422591d1,0.551914
3,271c9c5dd790f4,0.551914
4,d812d2f1d88a02,0.797255


In [50]:
#y_duo.to_csv('./predict_for_y/y_duo.csv',index=False)

# SQUAD

In [51]:
squad = pd.read_csv('./squad_data_final.csv')

In [52]:
y = np.array(squad["winPlacePerc"])
X = np.array(squad.drop("winPlacePerc",axis=1))
X_train,X_test,y_train,y_test = train_test_split(X, y,
                                               test_size=0.3,
                                               random_state=123)

In [53]:
lgb_squ = lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8,
                            max_depth=12, learning_rate=0.13,num_leaves=300,reg_alpha=0.5)
lgb_squ.fit(X_train , y_train)

LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.13, max_depth=12, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=300, objective='regression',
       random_state=None, reg_alpha=0.5, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [54]:
print("The score on validation set is : %0.4f"  % lgb_squ.score(X_test , y_test) )
print("The mse on validation set is : %0.6f"  % mean_squared_error(y_test , lgb_squ.predict(X_test)))

The score on validation set is : 0.9305
The mse on validation set is : 0.006352


# Predict

In [55]:
test_squad = pd.read_csv('./test_squad.csv')

In [56]:
test_squad.shape

(295427, 25)

In [57]:
test_squad.head()

Unnamed: 0,groupId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints
0,00000b5b45f70c,0.333333,6.333333,100.186667,1.0,0.0,6.666667,35.0,0.0,1.0,0.333333,22.116667,1586.0,26.0,25.0,1495.333333,0.666667,5755.333333,0.333333,0.0,0.0,0.0,1472.333333,5.0,0.0
1,00000fb8f2208b,0.5,0.0,47.3,0.0,0.0,0.0,70.5,0.0,0.0,0.0,0.0,1359.0,30.0,29.0,1419.5,0.0,0.0,0.0,0.0,0.0,0.0,290.85,4.0,0.0
2,000022937e1c55,0.0,1.0,80.75,0.75,0.0,1.5,62.75,0.0,0.25,0.25,4.83,1420.0,26.0,26.0,1486.0,0.25,0.0,0.0,0.0,0.0,0.0,264.05,2.25,0.0
3,000030557443b6,0.5,3.0,344.0,2.0,0.0,0.5,21.0,1389.0,1.5,1.0,16.395,1341.0,31.0,31.0,-1.0,0.5,0.02545,0.0,25.1,0.0,0.0,3337.5,6.0,1534.0
4,00004175e20fe7,0.25,0.5,69.885,0.0,0.0,1.75,52.5,1148.5,0.25,0.25,0.937,1982.0,28.0,28.0,-1.0,0.5,974.25,0.0,0.0,0.0,0.0,715.5143,3.25,1464.75


In [58]:
test_squad_y = lgb_squ.predict(test_squad.drop(['groupId'],axis=1))

In [59]:
test_squad_y[test_squad_y < 0] = 0
test_squad_y[test_squad_y > 1] = 1

In [60]:
groupid_y = pd.DataFrame(columns=['groupId','winPlacePerc'])
groupid_y['groupId'] = test_squad['groupId']
groupid_y['winPlacePerc'] = test_squad_y

In [61]:
groupid_y.head()

Unnamed: 0,groupId,winPlacePerc
0,00000b5b45f70c,0.740641
1,00000fb8f2208b,0.307538
2,000022937e1c55,0.260517
3,000030557443b6,0.919318
4,00004175e20fe7,0.419215


In [62]:
id_groupid = pd.read_csv('./squad_id_groupid.csv')
id_groupid.head()

Unnamed: 0,Id,groupId
0,9329eb41e215eb,676b23c24e70d6
1,63d5c8ef8dfe91,0b45f5db20ba99
2,ee6a295187ba21,6604ce20a1d230
3,3e2539b5d78183,029b5a79e08cd6
4,a8a377e4d43bf8,61ec2e7730a3b8


In [64]:
try1 = pd.merge(id_groupid, groupid_y, on=['groupId'])
try1.head()

Unnamed: 0,Id,groupId,winPlacePerc
0,9329eb41e215eb,676b23c24e70d6,0.265636
1,d6267a32c5709c,676b23c24e70d6,0.265636
2,b896f8954a92e2,676b23c24e70d6,0.265636
3,2f134f2c7be198,676b23c24e70d6,0.265636
4,63d5c8ef8dfe91,0b45f5db20ba99,0.824983


In [65]:
y_squad = try1.drop(['groupId'],axis=1)

In [66]:
y_squad.head()

Unnamed: 0,Id,winPlacePerc
0,9329eb41e215eb,0.265636
1,d6267a32c5709c,0.265636
2,b896f8954a92e2,0.265636
3,2f134f2c7be198,0.265636
4,63d5c8ef8dfe91,0.824983


In [67]:
#y_squad.to_csv('./predict_for_y/y_squad.csv',index=False)

# OTHER

In [68]:
other = pd.read_csv('./other_data_final.csv')

In [69]:
other.shape

(4205, 25)

In [70]:
y = np.array(other["winPlacePerc"])
X = np.array(other.drop("winPlacePerc",axis=1))
X_train,X_test,y_train,y_test = train_test_split(X, y,
                                               test_size=0.3,
                                               random_state=123)

In [140]:
lgb.LGBMRegressor()

LGBMRegressor(boosting_type='gbdt', class_weight=None, colsample_bytree=1.0,
       importance_type='split', learning_rate=0.1, max_depth=-1,
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=31, objective=None,
       random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [141]:
lgb_other = lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8)
param_lgb={ 'max_depth': range(10,15,2),
                   'num_leaves':range(120, 181, 30),
                   'learning_rate':np.arange(0.07,0.17,0.03)}
gs_lgb_other = GridSearchCV(lgb_other, param_lgb,cv=3)

In [142]:
gs_lgb_other.fit(X_train, y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.1, max_depth=-1, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_...=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'max_depth': range(10, 15, 2), 'num_leaves': range(120, 181, 30), 'learning_rate': array([0.07, 0.1 , 0.13, 0.16])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [143]:
print('Best Accuracy: %.2f%%' % (gs_lgb_other.best_score_*100))
print(gs_lgb_other.best_params_)

Best Accuracy: 89.76%
{'learning_rate': 0.07, 'max_depth': 12, 'num_leaves': 120}


In [144]:
lgb_other = lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8, max_depth=12)
param_lgb={ 'num_leaves':range(60, 121, 30),
                   'learning_rate':np.arange(0.01,0.08,0.03)}
gs_lgb_other = GridSearchCV(lgb_other, param_lgb,cv=3)

In [145]:
gs_lgb_other.fit(X_train, y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.1, max_depth=12, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_...=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'num_leaves': range(60, 121, 30), 'learning_rate': array([0.01, 0.04, 0.07])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [146]:
print('Best Accuracy: %.2f%%' % (gs_lgb_other.best_score_*100))
print(gs_lgb_other.best_params_)

Best Accuracy: 89.82%
{'learning_rate': 0.06999999999999999, 'num_leaves': 60}


In [147]:
lgb_other = lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8, max_depth=12,learning_rate=0.07)
param_lgb={ 'num_leaves':range(20, 61, 20),
                 'reg_alpha': np.arange(0,0.4,0.1),
                   'reg_lambda':np.arange(0,0.4,0.1) }
gs_lgb_other = GridSearchCV(lgb_other, param_lgb,cv=3)

In [148]:
gs_lgb_other.fit(X_train, y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.07, max_depth=12, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n...=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'num_leaves': range(20, 61, 20), 'reg_alpha': array([0. , 0.1, 0.2, 0.3]), 'reg_lambda': array([0. , 0.1, 0.2, 0.3])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [149]:
print('Best Accuracy: %.2f%%' % (gs_lgb_other.best_score_*100))
print(gs_lgb_other.best_params_)

Best Accuracy: 89.88%
{'num_leaves': 60, 'reg_alpha': 0.0, 'reg_lambda': 0.30000000000000004}


In [152]:
lgb_other = lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8, max_depth=12,learning_rate=0.07,num_leaves=60)
param_lgb={  'reg_lambda':np.arange(0.3,0.6,0.1) }
gs_lgb_other = GridSearchCV(lgb_other, param_lgb,cv=3)

In [153]:
gs_lgb_other.fit(X_train, y_train)

GridSearchCV(cv=3, error_score='raise-deprecating',
       estimator=LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.07, max_depth=12, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n...=0.0, reg_lambda=0.0, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'reg_lambda': array([0.3, 0.4, 0.5])},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring=None, verbose=0)

In [154]:
print('Best Accuracy: %.2f%%' % (gs_lgb_other.best_score_*100))
print(gs_lgb_other.best_params_)

Best Accuracy: 89.88%
{'reg_lambda': 0.3}


In [71]:
lgb_other =  lgb.LGBMRegressor(objective='regression',metric='rmse', bagging_fraction = 0.8,feature_fraction = 0.8, learning_rate=0.07, max_depth=12,
                           reg_alpha = 0, reg_lambda=0.3, num_leaves=60)
lgb_other.fit(X_train , y_train)

LGBMRegressor(bagging_fraction=0.8, boosting_type='gbdt', class_weight=None,
       colsample_bytree=1.0, feature_fraction=0.8, importance_type='split',
       learning_rate=0.07, max_depth=12, metric='rmse',
       min_child_samples=20, min_child_weight=0.001, min_split_gain=0.0,
       n_estimators=100, n_jobs=-1, num_leaves=60, objective='regression',
       random_state=None, reg_alpha=0, reg_lambda=0.3, silent=True,
       subsample=1.0, subsample_for_bin=200000, subsample_freq=0)

In [72]:
print("The score on validation set is : %0.4f"  % lgb_other.score(X_test , y_test) )
print("The mse on validation set is : %0.6f"  % mean_squared_error(y_test , lgb_other.predict(X_test)))

The score on validation set is : 0.8869
The mse on validation set is : 0.010084


# Predict

In [74]:
test_other = pd.read_csv('./test_other.csv')

In [75]:
test_other.shape

(1642, 25)

In [76]:
test_other.head()

Unnamed: 0,groupId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,longestKill,matchDuration,maxPlace,numGroups,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints
0,000330241fb75b,0.0,0.25,113.3325,0.5,0.5,0.5,41.5,0.0,0.5,0.5,40.6,1781.0,24.0,24.0,1500.0,0.25,1585.5,0.0,0.0,0.0,0.0,2201.0,6.0,0.0
1,000889fecd6a67,0.0,3.5,73.95,0.5,0.0,1.0,21.5,0.0,1.5,1.0,21.015,906.0,48.0,46.0,1500.0,0.0,6116.5,1.5,0.0,0.0,0.0,407.6,2.5,0.0
2,00438c61c81452,0.0,1.0,39.16,0.0,0.0,0.5,61.5,0.0,0.0,0.0,0.0,897.0,49.0,46.0,1500.0,0.0,0.0,0.0,3.349,0.0,0.0,1221.05,0.5,0.0
3,009a3e3fa4f021,0.0,3.0,0.0,0.0,0.0,4.0,55.0,0.0,0.0,0.0,0.0,865.0,50.0,49.0,1500.0,0.0,0.0,0.0,55.44,0.0,0.0,2120.0,0.0,0.0
4,00a566d6c39132,0.0,1.0,0.0,0.0,0.0,1.0,57.0,0.0,0.0,0.0,0.0,866.0,51.0,49.0,1500.0,0.0,3049.0,0.0,0.0,0.0,1.0,618.1,3.0,0.0


In [77]:
test_other_y = lgb_other.predict(test_other.drop(['groupId'],axis=1))

In [78]:
test_other_y[test_other_y < 0] = 0
test_other_y[test_other_y > 1] = 1

In [79]:
groupid_y = pd.DataFrame(columns=['groupId','winPlacePerc'])
groupid_y['groupId'] = test_other['groupId']
groupid_y['winPlacePerc'] = test_other_y

In [80]:
id_groupid = pd.read_csv('./other_id_groupid.csv')
id_groupid.head()

Unnamed: 0,Id,groupId
0,a4943dd17dcb11,61a1b2e536d418
1,ef72584125b706,e8035b15fcff63
2,d1d9e926ba874d,fdc024aa78f068
3,32aed0f33cadf9,490f0ff7d50d26
4,42337be5970209,7dd77ad11bdb91


In [81]:
try1 = pd.merge(id_groupid, groupid_y, on=['groupId'])
try1.head()

Unnamed: 0,Id,groupId,winPlacePerc
0,a4943dd17dcb11,61a1b2e536d418,0.097166
1,86824de82899e9,61a1b2e536d418,0.097166
2,c5a87d859081fc,61a1b2e536d418,0.097166
3,ef72584125b706,e8035b15fcff63,0.267062
4,559896ab7e4a68,e8035b15fcff63,0.267062


In [84]:
y_other = try1.drop(['groupId'],axis=1)

In [85]:
y_other.head()

Unnamed: 0,Id,winPlacePerc
0,a4943dd17dcb11,0.097166
1,86824de82899e9,0.097166
2,c5a87d859081fc,0.097166
3,ef72584125b706,0.267062
4,559896ab7e4a68,0.267062


In [86]:
#y_other.to_csv('./predict_for_y/y_other.csv',index=False)