In [None]:
!pip install dataprep

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import warnings
warnings.filterwarnings('ignore')
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import dataprep
from dataprep.eda import create_report
from dataprep.eda import plot
from dataprep.eda import plot_correlation
from dataprep.eda import plot_missing

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Data Preparation

In [None]:
df = pd.read_csv('/kaggle/input/pokemon/Pokemon.csv')
df.head(10)

In [None]:
df.describe()

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df['Type 2'].fillna('none',inplace = True)

In [None]:
df.isnull().sum()

In [None]:
pokemon_duplicate_check = df['Name'].duplicated().any()
pokemon_duplicate_check

In [None]:
df.head()

## Pokemon Distribution with Piechart

In [None]:
def f(data, a, b, title):
    pokemon_dist = data.groupby(a)[b].count()
    pie,ax = plt.subplots(figsize=[10,10])
    labels = pokemon_dist.keys()
    plt.pie(x = pokemon_dist, autopct = '%.1f%%', explode = [0.05]*labels.nunique(), labels = labels, pctdistance = 0.5)
    plt.title(title)
    plt.show()

In [None]:
f(data = df, a = 'Generation', b = 'Name', title = 'Distribution of Pokemon for each Generation')

In [None]:
f(data = df, a = 'Type 1', b = 'Name', title = 'Pokemon Type 1 Distribution')

In [None]:
f(data = df[df['Generation']==1], a = 'Type 1', b = 'Name', title = 'Generation 1 Pokemon Type 1 Distribution')

In [None]:
f(data = df[df['Generation']==2], a = 'Type 1', b = 'Name', title = 'Generation 2 Pokemon Type 1 Distribution')

In [None]:
f(data = df[df['Generation']==3], a = 'Type 1', b = 'Name', title = 'Generation 3 Pokemon Type 1 Distribution')

In [None]:
f(data = df[df['Generation']==4], a = 'Type 1', b = 'Name', title = 'Generation 4 Pokemon Type 1 Distribution')

In [None]:
f(data = df[df['Generation']==5], a = 'Type 1', b = 'Name', title = 'Generation 5 Pokemon Type 1 Distribution')

In [None]:
f(data = df[df['Generation']==6], a = 'Type 1', b = 'Name', title = 'Generation 6 Pokemon Type 1 Distribution')

In [None]:
f(data = df, a = 'Type 2', b = 'Name', title = 'Pokemon Type 2 Distribution')

In [None]:
f(data = df[df['Generation']==1], a = 'Type 2', b = 'Name', title = 'Generation 1 Pokemon Type 2 Distribution')

In [None]:
f(data = df[df['Generation']==2], a = 'Type 2', b = 'Name', title = 'Generation 2 Pokemon Type 2 Distribution')

In [None]:
f(data = df[df['Generation']==3], a = 'Type 2', b = 'Name', title = 'Generation 3 Pokemon Type 2 Distribution')

In [None]:
f(data = df[df['Generation']==3], a = 'Type 2', b = 'Name', title = 'Generation 3 Pokemon Type 2 Distribution')

In [None]:
f(data = df[df['Generation']==4], a = 'Type 2', b = 'Name', title = 'Generation 4 Pokemon Type 2 Distribution')

In [None]:
f(data = df[df['Generation']==5], a = 'Type 2', b = 'Name', title = 'Generation 5 Pokemon Type 2 Distribution')

In [None]:
f(data = df[df['Generation']==6], a = 'Type 2', b = 'Name', title = 'Generation 6 Pokemon Type 2 Distribution')

In [None]:
df.head()

## Max Status of Each Pokemon Generation

In [None]:
def f(generation):
    status = ['Total','Attack','Defense','Sp. Atk','Sp. Def','Speed']
    df_gen = df[df['Generation']==generation]
    print('Generation ',generation,' Pokemon Max Status')
    for i in status:
        maxstats = df_gen.loc[df_gen[i]==df_gen[i].max(),'Name']
        print('Max ',i,' : ',maxstats.values,df_gen[i].max())

In [None]:
f(1)

In [None]:
f(2)

In [None]:
f(3)

In [None]:
f(4)

In [None]:
f(5)

In [None]:
f(6)

In [None]:
df.head()

In [None]:
df['Type 1 + Type 2'] = df['Type 1'] +' '+ df['Type 2']
df.head()

In [None]:
df['Average'] = (df['Total']/6).round(1)
df.head()

## Pokemon Status Distribution (Density Chart)

In [None]:
def a(generation, color):
    f, ax = plt.subplots(figsize=(20,20))
    df_gen = df[df['Generation']==generation]
    n = 1
    c = 'tab:'+ color
    print('Generation ',generation,' Pokemon Status Distribution')
    for i in ['Total','HP','Attack','Defense','Sp. Atk','Sp. Def','Speed','Average']:
        plt.subplot(4,2,n)
        x = df_gen[i]
        ax = sns.distplot(x, bins=10,color=c)
        n = n + 1
    

In [None]:
a(1,color = 'red')

In [None]:
a(2,'blue')

In [None]:
a(3,'green')

In [None]:
a(4,'purple')

In [None]:
a(5,'orange')

In [None]:
a(6,'grey')

In [None]:
df.head()

## Boxplot for Pokemon Status

In [None]:
stats_df = df.drop(['Total', 'Legendary', 'Generation', '#','Average'], axis=1)
plt.figure(figsize=(15,10))
sns.boxplot(data=stats_df)

In [None]:
plt.figure(figsize=(20,80))
n = 1
for i in [1,2,3,4,5,6]:
    df_gen = df[df['Generation']==i]
    stats_df = df_gen.drop(['Total', 'Legendary', 'Generation', '#'], axis=1)
    plt.subplot(6,1,n)
    sns.boxplot(data=stats_df)
    plt.title('Generation '+ str(i) +' Pokemon Status Boxplot', size = 14)
    n = n + 1

## Pokemon Type Population grouped by Generation

In [None]:
df.head()

In [None]:
type1_2 = pd.DataFrame({'count' : df.groupby( [ "Generation", "Type 1",'Type 2'] ).size()}).reset_index()
type1_2.head()

In [None]:
fig = px.treemap(type1_2, names = 'Type 2', values = 'count', path = ['Generation', 'Type 1', 'Type 2'], 
                 color_discrete_sequence = px.colors.qualitative.Set1, width = 1500, height = 1000)
fig.show()

## Pokemon Type Countplot

In [None]:
plt.figure(figsize=(15,10))
sns.countplot(x='Type 1', data=df,order = df['Type 1'].value_counts().index)
plt.xticks(rotation = 45)
plt.show()

In [None]:
plt.figure(figsize=(15,10))
sns.countplot(x='Type 2', data=df,order = df['Type 2'].value_counts().index)
plt.xticks(rotation = 45)
plt.show()

## Linear Regression for Pokemon Statuses

In [None]:
sns.lmplot(x='Attack', y='Defense', data=df, line_kws={'color': 'red'}, hue = 'Legendary')

In [None]:
sns.lmplot(x='Attack', y='Sp. Atk', data=df, line_kws={'color': 'red'},hue='Legendary')

In [None]:
sns.lmplot(x='Defense', y='Sp. Def', data=df, line_kws={'color': 'red'},hue = 'Legendary')

In [None]:
sns.lmplot(x='Attack', y='HP', data=df, line_kws={'color': 'red'},hue='Legendary')

In [None]:
sns.lmplot(x='Defense', y='HP', data=df, line_kws={'color': 'red'}, hue = 'Legendary')

In [None]:
sns.lmplot(x='Attack', y='Speed', data=df, line_kws={'color': 'red'}, hue = 'Legendary')

In [None]:
sns.lmplot(x='Defense', y='Speed', data=df, line_kws={'color': 'red'}, hue = 'Legendary')

In [None]:
sns.lmplot(x='HP', y='Speed', data=df, line_kws={'color': 'red'}, hue = 'Legendary')

In [None]:
sns.pairplot(df, vars = ['HP','Attack','Defense','Sp. Atk','Sp. Def','Speed'],kind = 'reg',hue='Legendary')

## Data Prep for Pokemon Data Statistics

In [None]:
plot(df,'Total')

In [None]:
plot(df,'HP')

In [None]:
plot(df,'Attack')

In [None]:
plot(df,'Defense')

In [None]:
plot(df,'Sp. Atk')

In [None]:
plot(df,'Sp. Def')

In [None]:
plot(df,'Speed')

In [None]:
plot(df,'Average')

In [None]:
plot(df,'Type 1')

In [None]:
plot(df, "Type 2")

In [None]:
plot(df, 'Type 1 + Type 2')