In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

## Exploratory Data Analysis in PUBG Data

In [70]:
df = pd.read_csv('edapubg.csv')

In [71]:
print('Mengecek nilai null pada dataframe \n', df.isnull().any(), '\n')
print('Mengetahui Tipe data pada dataframe \n', df.dtypes, '\n')
print('Mengetahui ukuran dataframe', df.shape, '\n')

Mengecek nilai null pada dataframe 
 killed_by            False
killer_name           True
killer_placement      True
killer_position_x     True
killer_position_y     True
map                   True
match_id             False
time                 False
victim_name          False
victim_placement      True
victim_position_x    False
victim_position_y    False
dtype: bool 

Mengetahui Tipe data pada dataframe 
 killed_by             object
killer_name           object
killer_placement     float64
killer_position_x    float64
killer_position_y    float64
map                   object
match_id              object
time                   int64
victim_name           object
victim_placement     float64
victim_position_x    float64
victim_position_y    float64
dtype: object 

Mengetahui ukuran dataframe (500000, 12) 



In [4]:
df.head()

Unnamed: 0,killed_by,killer_name,killer_placement,killer_position_x,killer_position_y,map,match_id,time,victim_name,victim_placement,victim_position_x,victim_position_y
0,Hit by Car,Mine008,16.0,406548.9,443307.6,MIRAMAR,2U4GBNA0Ymkpggu1uhT3W6mTS1FSPVOGp7GUN-YyCcamfa...,952,Mine008,16.0,406548.9,443307.6
1,SCAR-L,EXcarLiber,7.0,467967.8,566232.6,MIRAMAR,2U4GBNA0Ymn8kakDAwbXTATnl3CCPAnVzdY3Mv29Dwu0oj...,828,Shkr89,14.0,470162.2,567417.4
2,UMP9,bilibilifanfan,10.0,546012.1,235056.8,ERANGEL,2U4GBNA0Ymns8sj0nKyxZWl3B7-0CKMW0-RYyMYrg2IMl5...,671,TlknBoutPractice,15.0,548855.9,233495.8
3,Drown,,,,,ERANGEL,2U4GBNA0YmncC2jML4aUKeThs1I5Oy0q66F5BjBBeBdnE4...,167,Frivolo,27.0,0.0,0.0
4,M416,Penard,2.0,292111.8,347930.4,ERANGEL,2U4GBNA0YmkJzzwKWMn2RkYLgpI-0HStabLs0H5CdDwYoX...,1634,SpyderxByte,6.0,291005.5,348340.9


# Objective

1. Senjata yang paling banyak digunakan untuk membunuh?
2. Detail Senjata TOP 3
3. Heatmap lokasi pemain banyak terbunuh
4. Motion Graph waktu dengan jumlah kematian
5. Senjata yang sering digunakan di early dan late game?

### Data Description

|Variable Name|	Variable Meaning|	Variable Type|
| :--- | :--- | :--- |
|killed_by|	Which weapon is killed|	Nominal|
|killer_name|	Killer game id|	Nominal|
|killer_placement|	The final ranking of the team where the killer is located|	Discrete|
|killer_position_x|	X coordinate of the killer when the killing behavior occurs|	Continuous|
|killer_position_y|	The Y coordinate of the killer when the killing behavior occurs|	Continuous|
|map|	Game Map (ERANGEL ISLAND/MIRAMAR DESERT)| Nominal|
|match_id|	Event Unique ID|	Nominal|
|time|	When the kill occurs (how many seconds after the game starts)|	Discrete|
|victim_name|	The killed game id|	Nominal|
|victim_placement|	The final ranking of the team where the killer is located|	Discrete|
|victim_position_x|	X coordinate of the person being killed when the killing occurs|	Continuous|
|victim_position_y|	The Y coordinate of the killer at the time of the killing behavior|	Continuous|


In [72]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 500000 entries, 0 to 499999
Data columns (total 12 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   killed_by          500000 non-null  object 
 1   killer_name        465690 non-null  object 
 2   killer_placement   465690 non-null  float64
 3   killer_position_x  465690 non-null  float64
 4   killer_position_y  465690 non-null  float64
 5   map                493950 non-null  object 
 6   match_id           500000 non-null  object 
 7   time               500000 non-null  int64  
 8   victim_name        500000 non-null  object 
 9   victim_placement   490521 non-null  float64
 10  victim_position_x  500000 non-null  float64
 11  victim_position_y  500000 non-null  float64
dtypes: float64(6), int64(1), object(5)
memory usage: 45.8+ MB


In [6]:
list(df.columns)

['killed_by',
 'killer_name',
 'killer_placement',
 'killer_position_x',
 'killer_position_y',
 'map',
 'match_id',
 'time',
 'victim_name',
 'victim_placement',
 'victim_position_x',
 'victim_position_y']

### Data Preparation

In [7]:
df.dropna(inplace=True)

In [8]:
df.rename(columns={'time' : 'time(s)'}, inplace = True)

In [9]:
df.head()

Unnamed: 0,killed_by,killer_name,killer_placement,killer_position_x,killer_position_y,map,match_id,time(s),victim_name,victim_placement,victim_position_x,victim_position_y
0,Hit by Car,Mine008,16.0,406548.9,443307.6,MIRAMAR,2U4GBNA0Ymkpggu1uhT3W6mTS1FSPVOGp7GUN-YyCcamfa...,952,Mine008,16.0,406548.9,443307.6
1,SCAR-L,EXcarLiber,7.0,467967.8,566232.6,MIRAMAR,2U4GBNA0Ymn8kakDAwbXTATnl3CCPAnVzdY3Mv29Dwu0oj...,828,Shkr89,14.0,470162.2,567417.4
2,UMP9,bilibilifanfan,10.0,546012.1,235056.8,ERANGEL,2U4GBNA0Ymns8sj0nKyxZWl3B7-0CKMW0-RYyMYrg2IMl5...,671,TlknBoutPractice,15.0,548855.9,233495.8
4,M416,Penard,2.0,292111.8,347930.4,ERANGEL,2U4GBNA0YmkJzzwKWMn2RkYLgpI-0HStabLs0H5CdDwYoX...,1634,SpyderxByte,6.0,291005.5,348340.9
5,M416,LandG1314,15.0,515510.3,498309.9,ERANGEL,2U4GBNA0Ymm9rIRnggyA-ySgPGoS-7I740gePRquriJRec...,1091,GandL1111,15.0,515470.2,498240.6


### Senjata yang paling banyak digunakan untuk membunuh

In [10]:
df_bestgun = df.groupby(['killed_by']).count().sort_values('killer_name', ascending = False).reset_index()[['killed_by','killer_name']]
df_bestgun.rename(columns={ 'killer_name':'Total'}, inplace = True)
df_bestgun.drop([0], axis = 0, inplace = True)
df_bestgun.set_index('killed_by', inplace = True)


In [27]:
df_bestgun.reset_index(inplace = True)

In [28]:
df_bestgun.head()

Unnamed: 0,killed_by,Total
0,M416,55757
1,SCAR-L,45576
2,M16A4,45111
3,AKM,41560
4,UMP9,26657


### Detail Senjata Top 3

In [49]:
gunstats = pd.read_csv('pubg-weapon-stats.csv')

In [51]:
gun = ['M416','SCAR-L','M16A4']
list_col = ['Weapon Name', 'Weapon Type', 'Bullet Speed', 'Damage Per Second', 'Fire Mode']
top3 = gunstats.loc[gunstats['Weapon Name'].isin(gun)][list_col]

In [52]:
top3

Unnamed: 0,Weapon Name,Weapon Type,Bullet Speed,Damage Per Second,Fire Mode
5,SCAR-L,Assault Rifle,880.0,448.0,"Single, Automatic"
6,M416,Assault Rifle,890.0,502.0,"Single, Automatic"
7,M16A4,Assault Rifle,900.0,430.0,"Single, Burst"


### Heatmap lokasi pemain banyak terbunuh

In [62]:
df[['victim_position_x', 'victim_position_y']].to_csv('heatmapdeath.csv')

In [61]:
df[['victim_position_x', 'victim_position_y']].describe()

Unnamed: 0,victim_position_x,victim_position_y
count,450784.0,450784.0
mean,404558.944267,388083.788699
std,150970.887444,147466.351631
min,0.0,0.0
25%,327105.425,292082.55
50%,413314.3,383745.45
75%,505255.3,480732.75
max,794500.9,795902.1


### Total Kematian dalam Waktu tertentu

In [69]:
df['time(s)'].describe()

count    450784.000000
mean        728.264548
std         551.955718
min          60.000000
25%         229.000000
50%         552.000000
75%        1194.000000
max        2191.000000
Name: time(s), dtype: float64

In [67]:
time.head()

0     952
1     828
2     671
4    1634
5    1091
Name: time(s), dtype: int64