# PUBG Winner Pridictions

## Data Frames Meaning
Meaning of all the columns in Pubg DataSet( Copies from main Site)

* DBNOs - Number of enemy players knocked.
* assists - Number of enemy players this player damaged that were killed by teammates.
*     boosts - Number of boost items used.
*     damageDealt - Total damage dealt. Note: Self inflicted damage is subtracted.
*     headshotKills - Number of enemy players killed with headshots.
*     heals - Number of healing items used.
*     Id - Player’s Id
*     killPlace - Ranking in match of number of enemy players killed.
*     killPoints - Kills-based external ranking of player. (Think of this as an Elo ranking where only kills matter.) If there is a value other than -1 in rankPoints, then any 0 in killPoints should be treated as a “None”.
*     killStreaks - Max number of enemy players killed in a short amount of time.
*     kills - Number of enemy players killed.
*     longestKill - Longest distance between player and player killed at time of death. This may be misleading, as downing a player and driving away may lead to a large longestKill stat.
*     matchDuration - Duration of match in seconds.
*     matchId - ID to identify match. There are no matches that are in both the training and testing set.
*     matchType - String identifying the game mode that the data comes from. The standard modes are “solo”, “duo”, “squad”, “solo-fpp”, “duo-fpp”, and “squad-fpp”; other modes are from events or custom matches.
*     rankPoints - Elo-like ranking of player. This ranking is inconsistent and is being deprecated in the API’s next version, so use with caution. Value of -1 takes place of “None”.
*     revives - Number of times this player revived teammates.
* rideDistance - Total distance traveled in vehicles measured in meters.
*    roadKills - Number of kills while in a vehicle.
*    swimDistance - Total distance traveled by swimming measured in meters.
*    teamKills - Number of times this player killed a teammate.
*    vehicleDestroys - Number of vehicles destroyed.
*    walkDistance - Total distance traveled on foot measured in meters.
*    weaponsAcquired - Number of weapons picked up.
*    winPoints - Win-based external ranking of player. (Think of this as an Elo ranking where only winning matters.) If there is a value other than -1 in rankPoints, then any 0 in winPoints should be treated as a “None”.
*    groupId - ID to identify a group within a match. If the same group of players plays in different matches, they will have a different groupId each time.
*    numGroups - Number of groups we have data for in the match.
*    maxPlace - Worst placement we have data for in the match. This may not match with numGroups, as sometimes the data skips over placements.
*    winPlacePerc - The target of prediction. This is a percentile winning placement, where 1 corresponds to 1st place, and 0 corresponds to last place in the match. It is calculated off of maxPlace, not numGroups, so it is possible to have missing chunks in a match.


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from scipy.stats import norm
import warnings
warnings.filterwarnings(action='ignore')
plt.style.use('fivethirtyeight')
import featuretools as ft
import dask.dataframe as dd
import gc
gc.enable()

### Loading 100000 Rows to Increase the Speed.

In [None]:
train=pd.read_csv('../input/train_V2.csv',nrows=100000)

In [None]:
train.info()

In [None]:
print('size of the training data',train.shape)

# Box Plot for Multiple features of Pubg Train DataSet

In [None]:
sns.set_style("white")
sns.set_context("paper", font_scale=1.2, rc={"font.size":2,"axes.labelsize":10})

In [None]:
# plt.figure(figsize=(8,8))
train['heals_Range'] = pd.cut(train['heals'], [-1, 1, 5, 10, 20, 30, 40 , 50])
train['killPlace_Range'] = pd.cut(train['killPlace'], [-1, 5, 10, 20, 30, 40 , 50 , 60 , 70 , 80 , 90 ,100])
train['kills_Range'] = pd.cut(train['kills'], [-1,0,1,2, 5, 10, 20, 30, 40 ])
train['maxPlace_Range'] = pd.cut(train['maxPlace'], [-1, 5, 10, 20, 30, 40 , 50 , 60 , 70 , 80 , 90 ,100])
train['numGroups_Range'] = pd.cut(train['numGroups'], [-1, 5, 10, 20, 30, 40 , 50 , 60 , 70 , 80 , 90 ,100])
train['weaponsAcquired_Range'] = pd.cut(train['weaponsAcquired'], [-1, 5, 10, 20, 30, 40 , 50 , 60 , 70 ])

plots = ['assists' , 'boosts' , 'DBNOs' , 'headshotKills' , 'heals_Range' , 'killPlace_Range', 'kills_Range' , 'killStreaks',\
         'matchType', 'maxPlace_Range','numGroups_Range' ,'revives','roadKills','teamKills', 'vehicleDestroys' ,'weaponsAcquired_Range']

    
plt.figure(figsize=(16,50))
for i,j in zip(plots,range(1,17)):
    plt.subplot(8,2,j)
    sns.boxplot(x=i, y="winPlacePerc", data=train  , width=.4)
    plt.plot()



# Joint Plots for all the features of Pubg Data

In [None]:
joint_plots = [ 'assists', 'boosts', 'damageDealt', 'DBNOs',\
       'headshotKills', 'heals', 'killPlace', 'killPoints', 'kills',\
       'killStreaks', 'longestKill', 'matchDuration', 'maxPlace',\
       'numGroups', 'rankPoints', 'revives', 'rideDistance', 'roadKills',\
       'swimDistance', 'teamKills', 'vehicleDestroys', 'walkDistance',\
       'weaponsAcquired', 'winPoints']
# joint_plots = [ 'boosts','damageDealt']
plt.figure(figsize=(16,16))
for i in joint_plots:
    sns.jointplot( x="winPlacePerc",y=i, data=train, height=10, ratio=3 , color='y')
    plt.show()



In [None]:
# General Statistics
print("The average person kills {:.4f} players, while the most kills ever recorded \
is {}.".format(train['kills'].mean(),train['kills'].max()))

# Count plot for Kills 

In [None]:
sns.countplot(train['kills'].sort_values())

# Heat Map to Show relations between Features

In [None]:
f,ax = plt.subplots(figsize=(15, 15))
sns.heatmap(train.corr(), annot=True, linewidths=.5, fmt= '.1f',ax=ax)
plt.show()

# Distribution Plot for Various Features

In [None]:
plt.figure()
sns.distplot(train['winPlacePerc'],fit=norm)
plt.show()

In [None]:
plt.figure()
sns.distplot(train['winPoints'],fit=norm)
plt.show()

In [None]:
plt.figure()
sns.distplot(train['killPlace'],bins=50)
plt.show()

###### MatchType and duration

In [None]:
plt.figure(figsize=(9,7))
match_dur=train.groupby('matchType')['matchDuration'].agg('mean')
sns.barplot(x=match_dur.index,y=match_dur)
plt.gca().set_xticklabels(match_dur.index,rotation='45')
plt.gca().set_title('mean match-type duration')
plt.show()
plt.savefig('duration')

The **Killplace** feature shows negative correlation between other features because      
killplace is the Ranking in match of number of enemy players killed.

In [None]:
plt.figure()
sns.regplot(train['kills'].values,train['damageDealt'].values)
plt.gca().set_ylabel('Damage dealt')
plt.gca().set_xlabel('Total kills')
plt.show()


### Unused Codes Data

In [None]:
# train['winPoints'].value_counts()/train.shape[0]*100

# assists = pd.DataFrame(train['assists'].value_counts()/train.shape[0]*100)
# x=[]
# q1=[]
# q3=[]
# std=[]
# for i in assists.index:
#     x.append(train[train['assists']==i]['winPlacePerc'].mean())
#     q1.append(train[train['assists']==i]['winPlacePerc'].quantile(0.25))
#     q3.append(train[train['assists']==i]['winPlacePerc'].quantile(0.75))
#     std.append(train[train['assists']==i]['winPlacePerc'].std())
# assists['Q1'] = q1    
# assists['Mean'] = x
# assists['Q3'] = q3
# assists['Std'] = std
# display(assists)

# boosts = pd.DataFrame(train['boosts'].value_counts()/train.shape[0]*100)
# x=[]
# q1=[]
# q3=[]
# std=[]
# for i in boosts.index:
#     x.append(train[train['boosts']==i]['winPlacePerc'].mean())
#     q1.append(train[train['boosts']==i]['winPlacePerc'].quantile(0.25))
#     q3.append(train[train['boosts']==i]['winPlacePerc'].quantile(0.75))
#     std.append(train[train['boosts']==i]['winPlacePerc'].std())
# boosts['Q1'] = q1    
# boosts['Mean'] = x
# boosts['Q3'] = q3
# boosts['Std'] = std
# display(boosts)
# plt.subplot(8,2,1)
# sns.boxplot(x="assists", y="winPlacePerc", data=train  , width=.6)
# plt.plot()
# plt.subplot(8,2,2)
# sns.boxplot(x="boosts", y="winPlacePerc", data=train  , width=.6)
# plt.plot()
# plt.subplot(8,2,3)
# sns.boxplot(x="DBNOs", y="winPlacePerc", data=train  , width=.4)
# plt.plot()
# plt.subplot(8,2,4)
# sns.boxplot(x="headshotKills", y="winPlacePerc", data=train  , width=.4)
# plt.plot()
# plt.subplot(8,2,5)
# # sns.boxplot(x="heals", y="winPlacePerc", data=train  , width=.4)
# sns.boxplot(x="heals_Range", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,6)
# sns.boxplot(x="killPlace_Range", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,7)
# sns.boxplot(x="kills_Range", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,8)
# sns.boxplot(x="killStreaks", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,9)
# sns.boxplot(x="matchType", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,10)
# sns.boxplot(x="maxPlace_Range", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,11)
# sns.boxplot(x="numGroups_Range", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,12)
# sns.boxplot(x="revives", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,13)
# sns.boxplot(x="roadKills", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,14)
# sns.boxplot(x="teamKills", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,15)
# sns.boxplot(x="vehicleDestroys", y="winPlacePerc", data=train , width=.4)
# plt.plot()
# plt.subplot(8,2,16)
# sns.boxplot(x="weaponsAcquired_Range", y="winPlacePerc", data=train , width=.4)
# plt.plot()


# train['killsCategories'] = pd.cut(train['kills'], [-1, 0, 2, 5, 10, 60], labels=['0_kills','1-2_kills', '3-5_kills', '6-10_kills', '10+_kills'])
# plt.figure(figsize=(15,8))
# sns.boxplot(x="killsCategories", y="winPlacePerc", data=train)
# plt.show()

# sns.jointplot(x="winPlacePerc", y="walkDistance",  data=train, height=10, ratio=3, color="lime" )
# plt.show()
# sns.jointplot(x="winPlacePerc", y="rideDistance", data=train, height=10, ratio=3, color="y" )
# plt.show()
# sns.jointplot(x="winPlacePerc", y="boosts", data=train, height=10, ratio=3, color="blue")
# plt.show()
# sns.jointplot(x="winPlacePerc", y="kills", data=train, height=10, ratio=3, color="r")
# plt.show()
# sns.jointplot(x="winPlacePerc", y="heals", data=train, height=10, ratio=3, color="lime")
# plt.show()
# sns.jointplot(x="winPlacePerc", y="maxPlace",  data=train, height=10, ratio=3, color="lime" )
# plt.show()