# Introduction 
* In this kernel you will see some visualizations for a statistically better comprehension of pokemon data.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import seaborn as sns
# plotly
import plotly.plotly as py
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list the files in the input directory
# import warnings
import warnings
# filter warnings
warnings.filterwarnings('ignore')
from subprocess import check_output
print(check_output(["ls", "../input"]).decode("utf8"))
# Any results you write to the current directory are saved as output.
import os
print(os.listdir("../input"))

In [None]:
# Importing data
data = pd.read_csv('../input/pokemon.csv')
copied_data = data.copy()

In [None]:
data.info()

In [None]:
data.isnull().sum()

* As you see above, among all almost full columns,  only "Type 2" has 386 null objects. Therefore, we will have to deal with it later.

In [None]:
data.sample(10)

* Before checking the correlations between the features and making the data more interpretable with some plots, firstly let's drop the "#" column and instead of that column, **make the indices start from 1**.

In [None]:
# Droping "#" column
data.drop(['#'], axis=1, inplace = True)

In [None]:
# Starting index from 1 & assigning an index name
data.index = range(1,801,1)
data.index.name = "New Index"       
data.head(10) # let's check it now.

# An alternative and easy way to do the same thing:
#data.set_index('#', inplace = True)

* With the heatmap of Seaborn visualization library, let's see the correlations among features.
* Since correlation is about quantitative values, you can only see correlations of numerical features.
* At first sight, it seems that features have non-linear relationships among each other. Therefore, I'd rather use **"spearman"** than** "pearson"** as a correlation coefficient since also that's a small dataset.

In [None]:
# Correlation map through heatmap
f, ax = plt.subplots(figsize = (10,10))
sns.heatmap(data.corr('spearman'),linewidths=1, linecolor='black', cmap='Reds', annot = True, fmt='.2f',ax=ax)
plt.xticks(rotation = 45)
plt.yticks(rotation = 45)
plt.show()              # you don't need to write this, but if you don't write, an information script will be show up and visually ruins your kernel.

Now let's see how "Defense" and "Speed" change with respect to "Attack"

In [None]:
trace1 =go.Scatter(
                    x = data.Attack,
                    y = data.Defense,
                    mode = "markers",
                    name = "Attack",
                    marker = dict(color = 'rgba(255, 128, 255, 0.8)'),
                    text= data['Type 1'])

trace2 =go.Scatter(
                    x = data.Attack,
                    y = data.Speed,
                    mode = "markers",
                    name = "Defense",
                    marker = dict(color = 'rgba(15, 200, 30, 0.4)'),
                    text= data['Type 1'])

data2 = [trace1, trace2]
layout = dict(title = 'Defense and Speed values with respect to Attack',
              xaxis= dict(title= 'Attack',ticklen= 5,zeroline= False), # ticklen : eksenlerdeki değerlerin ticklerinin uzunluğu
              yaxis= dict(title= 'Defense & Speed',ticklen= 5,zeroline= False)
             )
fig = dict(data = data2, layout = layout)
iplot(fig)

* It's time to see how many pokemons there are from each Type1 features with seaborn's barplot.

In [None]:
plt.figure(figsize=(10,10))
sns.barplot(x= data['Type 1'].value_counts().index, y= data['Type 1'].value_counts().values)
plt.xticks(rotation=45)
plt.show()

* If you don't have a ruler close by you, it's better to plot with Pyplot to see absolute numbers of pokemons.

In [None]:
bar = go.Bar(
                x= data['Type 1'].value_counts().index,
                y= data['Type 1'].value_counts().values,
                marker = dict(color = 'rgba(21, 180, 255, 0.7)',
                             line=dict(color='rgb(104,32,0)',width=1.5)),
                text = data['Type 1'].value_counts().index)
databar = [bar]
layout = dict(title = 'Value Counts of Type 1 Pokemons',
             xaxis =dict(title='Species' ),
              yaxis =dict(title='Counts' )
             )
fig = go.Figure(data = databar, layout = layout)
iplot(fig)

* Now let's drop NaN values of "Type 2" column.

**While dropping you have to specify the columns having nan values through "subset", otherwise it won't work so nan values won't have been dropped**

In [None]:
data_new = data.copy()
data_new.dropna(subset=['Type 2'], inplace = True)
data_new.index = range(1,415,1)
data_new.index.name = 'Dropped'
data_new.head()

* Plotting a pieplot which shows the percentage of legendary of any type.

**Pie charts are usually not preferred since they're less visually informative. **

In [None]:
fig = {
  "data": [
    {
      "values": data_new['Type 2'].value_counts().values,
      "labels": data_new['Type 2'].value_counts().index,
      "type": "pie"
    }],
  "layout": {
        "title":"Percentages of Legendary Pokemons w.r.t. Types"
    }
}
iplot(fig)

* Using a "bubble plot" we can visualize multiple dimensions of our data. 
* Here, after sorting the attacks of first 50 greatest pokemons descending, size of the bubbles will represent the magnitude of defense and colors will correspond to magnitude of speed with respect to a colorscale. 

**If the indices of dataframes which will be concatenated don't pair off each other,  concatenated dataframe will not be sorted as you want, so watch out!**

In [None]:
data.sort_values('Attack',inplace=True,ascending=False)
datarank = data.copy()
datarank.index = range(0,800,1)

nums = copied_data['#'].iloc[:50]

sorted_data = pd.concat([datarank,nums], axis=1).iloc[:50]

In [None]:
sorted_data.head()

In [None]:
data_bubble = [ dict(x=sorted_data['#'],
         y=sorted_data['Attack'],
         mode= 'markers',
         marker = dict(size = datarank.Defense/3, color = datarank.Speed, showscale = True),
                 
         text = sorted_data.Name)]
iplot(data_bubble)

Lastly, we're visualizing statistics of different types of pokemons via a rainbow violin plot from greatest to.

* **The biggest advantage of using Plotly in violin plots within boxes is to be able to observe both  estimates of location and distribution due to its interactive plotting features.**

In [None]:
data['Type 1'].nunique()
types = []
c = ['hsl('+str(h)+',50%'+',50%)' for h in np.linspace(0, 255, 18)]

for i in range(18):
    violins = {
            "type": 'violin',
            "y": data.Attack[data['Type 1'] == data['Type 1'].value_counts(ascending=False).index[i]],
            "name": data['Type 1'].value_counts(ascending=False).index[i],
            "marker":{
                "color":c[i]},
            "box": {
                "visible": True
            },
            "meanline": {
                "visible": True
            }
        }
    types.append(violins)
iplot(types)

*** BONUS** : A redneck plot

In [None]:
# Splitting Data
data_water = data[data['Type 1']=='Water']
data_grass = data[data['Type 1']=='Grass']
data_fire = data[data['Type 1']=='Fire']
data_bug = data[data['Type 1']=='Bug']
data_psychic = data[data['Type 1']=='Psychic']

box1 = go.Box(
                y= data_water.Attack,
                name= 'Water Pokemons',
                marker = dict(color = 'rgb(12, 128, 128)'),
                boxmean='sd',
                boxpoints='all')
box2 = go.Box(
                y= data_grass.Attack,
                name= 'Grass Pokemons',
                marker = dict(color = 'rgb(100, 12, 38)'),
                boxmean='sd',
                boxpoints='all')
box3 = go.Box(
                y= data_fire.Attack,
                name= 'Fire Pokemons',
                marker = dict(color = 'rgb(12, 128, 128)'),
                boxmean='sd',
                boxpoints='all')
box4 = go.Box(
                y= data_bug.Attack,
                name= 'Bug Pokemons',
                marker = dict(color = 'rgb(50, 40, 100)'),
                boxmean='sd',
                boxpoints='all')
box5 = go.Box(
                y= data_psychic.Attack,
                name= 'Psychic Pokemons',
                marker = dict(color = 'rgb(45, 179, 66)'),
                boxmean='sd',
                boxpoints='all')

data_boxes = [box1,box2,box3,box4,box5]
iplot(data_boxes)

* I'll be thankful if you upvote this kernel in case you like it, thanks in advance.


# **END**