In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/pubg-finish-placement-prediction/test_V2.csv
/kaggle/input/pubg-finish-placement-prediction/train_V2.csv
/kaggle/input/pubg-finish-placement-prediction/sample_submission_V2.csv


In [3]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df


def import_data(file):
    """create a dataframe and optimize its memory usage"""
    df = pd.read_csv(file, parse_dates=True, keep_date_col=True)
    df = reduce_mem_usage(df)
    return df

In [4]:
print('-' * 80)
print('train')
train = import_data('/kaggle/input/pubg-finish-placement-prediction/train_V2.csv')

print('-' * 80)
print('test')
test = import_data('/kaggle/input/pubg-finish-placement-prediction/test_V2.csv')

--------------------------------------------------------------------------------
train
Memory usage of dataframe is 983.90 MB
Memory usage after optimization is: 498.81 MB
Decreased by 49.3%
--------------------------------------------------------------------------------
test
Memory usage of dataframe is 413.18 MB
Memory usage after optimization is: 225.31 MB
Decreased by 45.5%


In [5]:
train.shape

(4446966, 29)

In [6]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4446966 entries, 0 to 4446965
Data columns (total 29 columns):
 #   Column           Dtype   
---  ------           -----   
 0   Id               category
 1   groupId          category
 2   matchId          category
 3   assists          int8    
 4   boosts           int8    
 5   damageDealt      float16 
 6   DBNOs            int8    
 7   headshotKills    int8    
 8   heals            int8    
 9   killPlace        int8    
 10  killPoints       int16   
 11  kills            int8    
 12  killStreaks      int8    
 13  longestKill      float16 
 14  matchDuration    int16   
 15  matchType        category
 16  maxPlace         int8    
 17  numGroups        int8    
 18  rankPoints       int16   
 19  revives          int8    
 20  rideDistance     float16 
 21  roadKills        int8    
 22  swimDistance     float16 
 23  teamKills        int8    
 24  vehicleDestroys  int8    
 25  walkDistance     float16 
 26  weaponsAcquire

In [7]:
train['playersjoined']=train.groupby('matchId')['matchId'].transform('count')
train.head()

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined
0,7f96b2f878858a,4d4b580de459be,a10357fd1a4a91,0,0,0.0,0,0,0,60,...,0.0,0,0.0,0,0,244.75,1,1466,0.444336,96
1,eef90569b9d03c,684d5656442f9e,aeb375fc57110c,0,0,91.5,0,0,0,57,...,0.004501,0,11.039062,0,0,1434.0,5,0,0.640137,91
2,1eaf90ac73de72,6a4a42c3245a74,110163d8bb94ae,1,0,68.0,0,0,0,47,...,0.0,0,0.0,0,0,161.75,2,0,0.775391,98
3,4616d365dd2853,a930a9c79cd721,f1f1f4ef412d7e,0,0,32.90625,0,0,0,75,...,0.0,0,0.0,0,0,202.75,3,0,0.166748,91
4,315c96c26c9aac,de04010b3458dd,6dc8ff871e21e6,0,0,100.0,0,0,0,45,...,0.0,0,0.0,0,0,49.75,2,0,0.1875,97


In [8]:
train['traveldistance']=train['walkDistance']+train['swimDistance']+train['rideDistance']

In [9]:
train['healsandboosts']=train['heals']+train['boosts']

# outliers

In [10]:
train['killingwithoutmoving']=(train['kills']>0) & (train['traveldistance']== 0)

In [11]:
train.drop(train[train['killingwithoutmoving']==True].index,inplace=True)

In [12]:
train[train['winPlacePerc'].isnull()]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving
2744604,f70c74418bb064,12dfbede33f92b,224a123c53e008,0,0,0.0,0,0,0,1,...,0,0,0.0,0,0,,1,0.0,0,False


In [13]:
train.drop(2744604,inplace=True)

In [14]:
train[train['winPlacePerc'].isnull()]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving


In [15]:
train.shape

(4445430, 33)

In [16]:
train[train['walkDistance']>10000]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving
23026,8a6562381dd83f,23e638cd6eaf77,b0a804a610e9b0,0,1,0.00000,0,0,0,44,...,0,0,13528.0,7,0,0.816406,99,13536.0,1,False
34344,5a591ecc957393,6717370b51c247,a15d93e7165b05,0,3,23.21875,0,0,1,34,...,0,0,10032.0,7,1533,0.947266,65,10072.0,4,False
49312,582685f487f0b4,338112cd12f1e7,d0afbf5c3a6dc9,0,4,117.18750,1,0,1,24,...,0,0,12408.0,3,0,0.913086,94,12448.0,5,False
68590,8c0d9dd0b4463c,c963553dc937e9,926681ea721a47,0,1,32.34375,0,0,1,46,...,0,0,11592.0,3,1563,0.833496,96,12488.0,2,False
94400,d441bebd01db61,7e179b3366adb8,923b57b8b834cc,1,1,73.06250,0,0,3,27,...,0,0,10440.0,6,1488,0.819336,73,11496.0,4,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4306598,df451c374c4c9b,419e66ee851227,31bc0ed3bc2607,0,1,0.00000,0,0,11,43,...,0,0,12968.0,3,0,0.852051,98,13000.0,12,False
4370543,c8bdfbbb850447,fbb7a76b2b8d11,9968551b4b470a,0,0,37.84375,0,0,1,44,...,0,0,10368.0,5,1589,0.877441,99,10376.0,1,False
4380785,9d855e5e36307d,a5ddf6aaf1e199,fbf537fe68abb0,0,3,0.00000,0,0,5,49,...,0,0,11072.0,3,0,0.750000,94,12824.0,8,False
4405009,dfaf3309cb27b2,f318b2e7acf3f3,7fca6dfd204455,3,8,654.00000,6,1,11,1,...,0,0,13432.0,10,1503,1.000000,93,13584.0,19,False


In [17]:
train.drop(train[train['walkDistance']>10000].index,inplace=True)

In [18]:
train[train['swimDistance']>2000]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving
177973,c2e9e5631f4e54,23213058f83abe,f01eb1073ef377,0,5,78.125,1,0,1,47,...,0,0,1002.0,4,1466,0.958984,98,3298.0,6,False
274258,ba5e3dfb5a0fa0,383db055216ec2,d6e13468e28ab4,0,4,53.3125,0,0,16,39,...,0,0,2276.0,5,0,0.959961,94,10112.0,20,False
1005337,d50c9d0e65fe2a,4996575c11abcb,668402592429f8,0,1,503.0,4,3,1,6,...,0,0,4692.0,5,1516,1.0,88,10736.0,2,False
1195818,f811de9de80b70,d08ddf7beb6252,8a48703ab52ec8,0,7,352.25,3,1,6,4,...,0,0,415.0,10,1499,1.0,98,3084.0,13,False
1227362,a33e917875c80e,5b72674b42712b,5fb0d8b1fc16cf,0,1,589.0,3,1,1,46,...,0,0,995.5,9,0,1.0,86,4820.0,2,False
1889163,bd8cc3083a9923,1d5d17140d6fa4,8e2e6022d6e5c8,0,0,0.0,0,0,0,47,...,0,0,1966.0,3,0,0.583496,87,5312.0,0,False
2065940,312ccbb27b99aa,47c7f4d69e2fb1,b4b11756321f3a,1,3,49.59375,0,0,5,48,...,0,0,4296.0,8,1492,0.851074,96,9896.0,8,False
2327586,8773d0687c6aae,b17f46f9f6666c,56ee5897512c86,3,1,474.5,2,0,0,7,...,0,0,7.546875,6,0,1.0,91,2396.0,1,False
2784855,a8653b87e83892,383db055216ec2,d6e13468e28ab4,1,4,844.0,5,5,2,2,...,0,0,4688.0,4,0,0.959961,94,9928.0,6,False
3359439,3713b36e1ba9e1,1f7aed9240864a,584447ed875c85,0,0,0.0,0,0,0,77,...,0,0,1750.0,0,1518,0.214355,96,4088.0,0,False


In [19]:
train.drop(train[train['swimDistance']>2000].index,inplace=True)

In [20]:
train[train['rideDistance']>20000]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving
28588,6260f7c49dc16f,b24589f02eedd7,6ebea3b4f55b4a,0,0,99.18750,0,0,1,30,...,0,0,376.500000,6,0,0.642090,96,26320.0,1,False
63015,adb7dae4d0c10a,8ede98a241f30a,8b36eac66378e4,0,0,0.00000,0,0,0,55,...,0,0,185.375000,0,0,0.537598,94,22080.0,0,False
70507,ca6fa339064d67,f7bb2e30c3461f,3bfd8d66edbeff,0,0,100.00000,0,0,0,26,...,0,0,467.500000,3,0,0.887695,99,28912.0,0,False
72763,198e5894e68ff4,ccf47c82abb11f,d92bf8e696b61d,0,0,0.00000,0,0,0,46,...,1,0,687.000000,9,0,0.791504,97,21200.0,0,False
95276,c3fabfce7589ae,15529e25aa4a74,d055504340e5f4,0,7,778.00000,0,1,2,2,...,0,0,923.000000,6,0,0.978516,94,26736.0,9,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4270943,7a9ef7be64e37e,4873e2b0d83c69,653dc89930fdc2,0,7,560.00000,0,3,3,2,...,0,1,969.000000,5,1576,0.989746,99,33408.0,10,False
4301013,4985541631d98b,d0e454dc0ac3cf,3f819651e3c666,0,0,15.46875,0,0,0,53,...,1,0,50.250000,0,1489,0.500000,87,25072.0,0,False
4386384,5d472a1fcad9c9,f94054ff58eab0,ad37f12700c1ba,0,0,0.00000,0,0,0,47,...,0,0,470.000000,3,1500,0.777832,91,26688.0,0,False
4404738,810bcb9e6ff249,605266536c0f45,7f817462a20c19,0,1,84.37500,0,0,0,38,...,1,0,114.687500,0,1523,0.644043,74,24336.0,1,False


In [21]:
train.drop(train[train['rideDistance']>20000].index,inplace=True)

In [22]:
train[train['longestKill']>1000]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving
240005,41c2f5c0699807,9faecf87ab4275,634edab75860b3,5,0,1284.0,8,5,7,18,...,0,0,48.875,38,1500,0.538574,29,48.875,7,False
324313,ef390c152bcc3d,30fd444be3bbc1,4f7f8d6cf558b4,2,0,1028.0,0,0,0,9,...,0,0,1264.0,26,0,1.0,51,2980.0,0,False
803632,4e7e6c74e3c57d,94698690918933,da91b0c3d875f8,0,0,196.75,0,0,0,51,...,0,0,1074.0,22,0,0.0,61,3158.0,0,False
895411,1f5ba6e0cfb968,512ea24b831be3,5fb0d8b1fc16cf,4,0,1012.0,11,5,0,5,...,0,0,569.5,18,0,0.90918,86,569.5,0,False
1172437,303a93cfa1f46c,8795d39fd0df86,9c8962b58bb3e3,2,1,329.25,0,0,2,45,...,0,0,832.5,9,1500,0.285645,58,832.5,3,False
1209416,528659ff1c1aec,7d1ba83423551d,ea9386587d5888,0,6,1640.0,0,7,0,1,...,0,0,2848.0,4,1500,0.941406,52,2848.0,6,False
1642712,91966848e08e2f,0ee4fbd27657c9,17dea22cefe62a,3,2,2104.0,0,4,11,11,...,0,0,235.25,41,0,0.5,28,235.25,13,False
2015559,5ff0c1a9fab2ba,2d8119b1544f87,904cecf36217df,3,3,1302.0,0,6,5,15,...,0,0,133.25,43,0,0.600098,42,133.25,8,False
2122128,42df3102cb540b,7d9b2be15b355b,610d78f3affd2e,5,0,2500.0,0,7,1,2,...,2,0,464.5,52,0,0.0,10,464.5,1,False
2152425,4b9f61bac5eb0a,bc717b964f3bbe,838cb9a3c94598,3,0,945.5,0,0,0,11,...,0,0,844.5,14,0,0.571289,60,844.5,0,False


In [23]:
train.drop(train[train['longestKill']>1000].index,inplace=True)

In [24]:
train[train['kills']>30]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving
57978,9d8253e21ccbbd,ef7135ed856cd8,37f05e2a01015f,9,0,3724.0,0,7,0,2,...,0,0,48.812500,48,1500,0.856934,16,48.812500,0,False
87793,45f76442384931,b3627758941d34,37f05e2a01015f,8,0,3088.0,0,8,27,3,...,0,0,780.500000,45,1500,1.000000,16,780.500000,27,False
156599,746aa7eabf7c86,5723e7d8250da3,f900de1ec39fa5,21,0,5480.0,0,12,7,4,...,0,0,23.703125,61,0,0.700195,11,23.703125,7,False
160254,15622257cb44e2,1a513eeecfe724,db413c7c48292c,1,0,4032.0,0,40,0,1,...,1,0,718.500000,16,1500,1.000000,62,718.500000,0,False
180189,1355613d43e2d0,f863cd38c61dbf,39c442628f5df5,5,0,3172.0,0,6,15,1,...,0,0,71.500000,41,0,1.000000,11,71.500000,15,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4021018,1f1c3dda0296df,7c95f475fd2cdb,a9e84c456cc859,6,0,3406.0,0,8,10,2,...,0,0,2264.000000,40,0,0.916504,13,3816.000000,10,False
4127904,f699c842c5dfab,9d69e4e697d296,1ac375e4121651,5,0,3420.0,39,20,1,1,...,0,0,319.250000,29,1500,0.750000,36,319.250000,1,False
4148675,5283367a7f8d06,35b9b765110fd2,f900de1ec39fa5,12,0,3050.0,0,5,4,7,...,0,0,123.812500,89,0,0.399902,11,123.812500,4,False
4235682,6874be9215646b,af1d17223258d0,0f09bd72c4ba97,1,0,3006.0,0,9,18,2,...,0,0,557.500000,20,1500,0.428711,28,557.500000,18,False


In [25]:
train.drop(train[train['kills']>30].index,inplace=True)

In [26]:
train[train['roadKills']>12]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving
2733926,c3e444f7d1289f,489dd6d1f2b3bb,4797482205aaa4,0,0,1246.0,0,0,0,1,...,0,0,1277.0,0,1371,0.428711,92,1282.0,0,False
2890740,a3438934e3e535,1081c315a80d14,fe744430ac0070,0,8,2074.0,0,1,11,1,...,0,0,3150.0,4,1568,1.0,38,5876.0,19,False


In [27]:
train.drop(train[train['roadKills']>12].index,inplace=True)

In [28]:
train[train['weaponsAcquired']>70]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving
104825,4fe1c1b84d0e4c,62bc19a85c2ed8,17dea22cefe62a,3,0,2652.0,0,3,3,9,...,0,0,1024.0,71,0,0.928711,28,4644.0,3,False
180523,aa4031a9ba3fbf,04f57c0cfb1306,5921b55b9e96b5,7,0,2868.0,0,2,5,5,...,0,0,1436.0,72,0,0.733398,16,3108.0,5,False
233643,7c8c83f5f97d0f,b33b210a52a2f8,2e8a0917a71c43,0,0,67.125,0,0,0,44,...,0,0,1307.0,128,1577,0.710938,91,3188.0,0,False
588387,c58e3e0c2ba678,3d3e6100c07ff0,d04dbb98249f76,0,1,175.25,1,0,2,48,...,0,0,1687.0,80,0,0.75,98,1687.0,3,False
1004726,a77a0e7230bdca,1a79c284fd3661,e024bf51bf1799,1,0,881.5,0,1,0,19,...,1,0,1479.0,72,0,0.0,19,1679.0,0,False
1437471,8f0c855d23e4cd,679c3316056de8,fbaf1b3ae1d884,1,0,100.0,0,0,0,24,...,0,0,2400.0,102,0,0.893066,97,5300.0,0,False
1449293,db54cf45b9ed1c,898fccaeeb041d,484b4ae51fe80f,0,0,0.0,0,0,0,54,...,0,0,653.0,95,0,0.560059,80,653.0,0,False
1592744,634a224c53444e,75fa7591d1538c,f900de1ec39fa5,9,0,1726.0,0,3,0,9,...,0,0,981.0,94,0,0.199951,11,2888.0,0,False
1834515,e927e9020f7e0d,60d6b714a7308f,8e265af296007b,1,1,26.453125,0,0,1,51,...,0,0,2836.0,96,0,0.919922,95,2836.0,2,False
2373240,a8c150d5d3d3e2,6e1c8c5f91e060,f900de1ec39fa5,12,0,1746.0,0,1,1,11,...,0,0,276.0,96,0,0.0,11,276.0,1,False


In [29]:
train.drop(train[train['weaponsAcquired']>70].index,inplace=True)

In [30]:
train[train['heals']>35]

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints,winPlacePerc,playersjoined,traveldistance,healsandboosts,killingwithoutmoving
18405,63ab976895d860,927eeba5614c4f,69473402649f11,0,2,0.000,0,0,47,43,...,0,0,1387.0,6,0,0.937012,96,6856.0,49,False
54463,069ddee7c9d26a,58ab5a1ce8e06f,942416b6caf21e,1,4,182.000,0,1,43,21,...,0,0,2792.0,7,0,0.961426,93,3084.0,47,False
67676,9141a9dd94680e,10e9baaea7ee52,1746a49f176549,0,3,97.000,0,0,37,45,...,0,0,3504.0,3,0,0.897461,89,5324.0,40,False
100152,6c814c89c4e063,43f1edf6402ad5,7be6241e8a3be8,0,6,0.000,0,0,37,43,...,0,0,2960.0,4,1591,0.867188,99,3018.0,43,False
112499,3a730c969a78ee,96774752376c8f,0998fc79a99bae,0,1,110.000,0,1,38,29,...,0,0,1140.0,6,0,0.729004,98,5364.0,39,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4369025,a3481955ab300b,95e725d4ee6d9c,1b17310cd1ad8e,1,3,313.750,1,2,47,4,...,0,0,1103.0,4,1523,0.744141,86,1103.0,50,False
4394898,4829aab2dd2904,1eb8d05b80bb29,aeeccc37519d2a,3,3,1173.000,10,1,36,1,...,0,0,1995.0,8,0,1.000000,93,4496.0,39,False
4397805,703ce5805efed7,b84c7dd65f03cd,d8fa2223c98979,0,1,44.500,0,0,62,44,...,0,0,2628.0,6,0,0.836914,97,2628.0,63,False
4436938,ecf4468be5302c,8a934182175702,4b1eb1af720e23,0,3,0.000,0,0,36,42,...,0,0,2386.0,8,0,0.868164,92,2834.0,39,False


In [31]:
train.drop(train[train['heals']>35].index,inplace=True)

In [32]:
train.shape

(4444631, 33)

# Prediction modeling

In [33]:
df=pd.concat([train,test],ignore_index=True)

In [34]:
df.shape

(6378805, 33)

In [35]:
enc_df2 = pd.get_dummies(df[["matchType"]])
df.drop(['matchType'],axis=1,inplace=True)
#df.rename(columns={0.0:'Ungraduated',1.0:'Graduated'},inplace=True)
df = df.join(enc_df2)

df.drop(['Id','groupId','matchId'],axis=1,inplace=True)
trainx = df.iloc[:4444630]
testx = df.iloc[4444631:].fillna(0)
y=trainx["winPlacePerc"]
X = trainx.drop(["winPlacePerc"],axis=1)
X_test = testx.drop(["winPlacePerc"],axis=1)

from sklearn.model_selection import train_test_split
#xtrain,xtest,ytrain,ytest = train_test_split(X,y,test_size=0.4,random_state=42)
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler

prep = MinMaxScaler()

In [36]:
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
y.fillna(0,inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  downcast=downcast,


In [37]:
X_train,X_val,y_train,y_val = train_test_split(X,y,test_size=0.4,random_state=42)

In [38]:
DTree_clf = DecisionTreeRegressor()
DTree_clf.fit(X, y)

DecisionTreeRegressor()

In [40]:
DTree_pred = DTree_clf.predict(X_test)

In [41]:
DTree_pred[:20]

array([0.        , 0.54882812, 0.        , 0.        , 0.5       ,
       0.        , 0.54541016, 0.        , 0.        , 0.38110352,
       0.48950195, 0.23083496, 0.77783203, 0.97851562, 0.01589966,
       0.39135742, 0.29638672, 0.        , 0.        , 0.1071167 ])

In [42]:
X_test.shape

(1934174, 44)

In [43]:
test.shape

(1934174, 28)

In [44]:
test.tail()

Unnamed: 0,Id,groupId,matchId,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,...,rankPoints,revives,rideDistance,roadKills,swimDistance,teamKills,vehicleDestroys,walkDistance,weaponsAcquired,winPoints
1934169,a316c3a13887d5,80e82180c90951,f276da5fd73536,1,2,381.0,3,0,7,4,...,-1,1,0.0,0,0.0,0,0,1799.0,5,1526
1934170,5312146b27d875,147264a127c56e,fb77cbfe094a6b,0,0,0.0,0,0,0,64,...,-1,0,0.0,0,0.0,0,0,1195.0,3,1496
1934171,fc8818b5b32ad3,7bbb4779ebedd5,efb74d5f6e9779,0,0,91.9375,0,0,3,24,...,1482,0,0.0,0,7.796875,1,0,3328.0,3,0
1934172,a0f91e35f8458f,149a506a64c2b1,3a2cf335bc302f,1,2,138.625,0,0,12,25,...,1456,1,0.0,0,0.0,0,0,1893.0,4,0
1934173,3696fc9f3a42b2,c770d7feaf020f,9d4e2b6d506fd0,0,0,19.796875,0,0,0,86,...,-1,0,0.0,0,0.0,0,0,68.875,0,1551


In [45]:
X_test.head()

Unnamed: 0,assists,boosts,damageDealt,DBNOs,headshotKills,heals,killPlace,killPoints,kills,killStreaks,...,matchType_normal-duo,matchType_normal-duo-fpp,matchType_normal-solo,matchType_normal-solo-fpp,matchType_normal-squad,matchType_normal-squad-fpp,matchType_solo,matchType_solo-fpp,matchType_squad,matchType_squad-fpp
4444631,0,0,51.46875,0,0,0,73,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4444632,0,4,179.125,0,0,2,11,0,2,1,...,0,0,0,0,0,0,0,0,0,0
4444633,1,0,23.40625,0,0,4,49,0,0,0,...,0,0,0,0,0,0,0,0,0,1
4444634,0,0,65.5,0,0,0,54,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4444635,0,4,330.25,1,2,1,7,0,3,1,...,0,0,0,0,0,0,0,0,0,1


In [46]:
output = pd.DataFrame({'Id':test.Id,'winPlacePerc':DTree_pred})

In [47]:
output

Unnamed: 0,Id,winPlacePerc
0,9329eb41e215eb,0.000000
1,639bd0dcd7bda8,0.548828
2,63d5c8ef8dfe91,0.000000
3,cf5b81422591d1,0.000000
4,ee6a295187ba21,0.500000
...,...,...
1934169,a316c3a13887d5,0.625000
1934170,5312146b27d875,0.033295
1934171,fc8818b5b32ad3,0.222168
1934172,a0f91e35f8458f,0.694336
