In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Importing Libraries

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

import cufflinks as cf
import plotly.express as px
import plotly.graph_objects as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected= True)
cf.go_offline()

import missingno as mno

## Read Dataset

In [None]:
df=pd.read_csv('/kaggle/input/pubg-weapon-stats/pubg-weapon-stats.csv')

In [None]:
df.head(2)

In [None]:
df.tail(2)

## Missing Data overview

Missingno delivers a good tool to view missing data.

With a bar plot we look at the number of non-missing data first

In [None]:
mno.bar(df)

Next we look at the correlation between the missing data patterns.

In [None]:
mno.heatmap(df)

The graph above shows that the missing values are highly correlated with Range. This implies that the ones with missing data are the ones without Range. So lets look at the different classes of weapons there is.

In [None]:
px.bar(df['Weapon Type'], y= None,
      title="",
             labels={"count": "Count of weapons (subclasses)",
              "value": "Weapon Classes"})

First separate out the ranged weapons(guns).

In [None]:
ranged = df.loc[~df['Weapon Type'].isin(['Melee', 'Other'])]
other = df.loc[df['Weapon Type'].isin(['Melee', 'Other'])]

Lets look at the missing data again

In [None]:
mno.bar(ranged)

It seems that we still have a few missing observations. Lets remove them for now.

In [None]:
ranged.dropna(inplace=True)

## Exploratory data analysis

First check the different data types we have; this will help us later on.

In [None]:
ranged.dtypes

The heatmap of the seaborn is useful to visualize the correlation between the different numeric variables.

In [None]:
sns.heatmap(ranged.select_dtypes(include=['float64', 'int64']).corr('pearson'), annot=False)

The correlation heatmap above gives a sense of how closely correlated the variables are. (For now we restrict ourselves to 'pearson' correlation only which is the simplest one to interpret.) Higher correlation (more closer to 1.0 in the correlation table) between 2 variables imply increasing one variable increases or decreases proportionately the other variable. Low correlation (more closer to 0.0 in the correlation table) implies that changing one variable does not change the other much.

### Range of the Weapon Type
In any shooter game, I try to balance between Range and Damage when starting out, so this gives an overview of the different ranges of weapon clases with indication of damage.

In [None]:
fig = px.bar(ranged.set_index('Weapon Type'),
             x= 'Range', color = 'Damage',
             title="Weapon Types vs Range and Damage")

fig.show()

### Weapon type vs Magazine Capacity
Another item of interest is the Weapon type and magazine capacity, but keeping in mind the Rate of Fire.

In [None]:
px.bar(ranged.set_index('Weapon Type'), x ='Magazine Capacity', color = 'Rate of Fire',
      barmode ='stack')

### Weapon Type vs the BDMG_1, BDMG_2, BDMG_3, HDMG_1, HDMG_2, HDMG_3

In [None]:
px.bar(ranged[['Weapon Type', 'BDMG_1','BDMG_2','BDMG_3', 'HDMG_1','HDMG_2','HDMG_3']].set_index('Weapon Type'),
       labels={'value':'Damages'},
       barmode= 'group',color_discrete_sequence=px.colors.sequential.Inferno)

### Weapon Type vs Shots to Kill
Lastly we look at the shots to kill based on weapon types, but for this we take into account all the weapons (including the melee and other)

In [None]:
px.bar(df[['Weapon Type', 'Shots to Kill (Chest)', 'Shots to Kill (Head)']].set_index('Weapon Type'), 
       barmode= 'group',
      labels={'value':'Shots to kill'})

#### Conculding Remarks
It seems that Sniper Rifle is the best ranged weapon. Lets look more closely at it.

In [None]:
sniper = df.loc[df['Weapon Type'].isin(['Sniper Rifle'])]
sniper.drop('Weapon Type', axis= 1, inplace = True)

In [None]:
px.bar(sniper.set_index('Weapon Name')[["Range","Bullet Speed","Damage", "Damage Per Second"]], 
       barmode ="group",
       title = "Summary of the differnt sniper rifles",
      labels = {'value':'values'})

Seems that the AWM sniper rifle is the best with respect to Range and Damage, but if combined with Damage and Damage per second, Win94 doesn't seem to fare badly, but its range is quite lesser than Range.

#### Thank you for going through my notebook. 
### This is my very first post in Kaggle. So I encourage any feedback.
