In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt #plotting, data viz

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#read in data table
pokemon_table=pd.read_csv('/kaggle/input/pokemon/pokemon.csv')

In [None]:
pokestats=( #groupby type1,type2, get mean, grab relevant data
    pokemon_table.groupby(['type1','type2']).mean().get(['hp','speed','attack','defense', 'sp_attack','sp_defense'])
)
win_types=pokestats.assign(stats_total=pokestats.sum(axis=1)).sort_values(by='stats_total', ascending=False).reset_index() #find total stats per type, sort
win_types

In [None]:
win_types_top_10=win_types[:10]
#supporting data viz
fig, ax=plt.subplots(figsize=(18,12)) 
plt.barh(win_types_top_10.index,win_types_top_10.get('stats_total'))
plt.yticks(win_types_top_10.index,labels=[['dragon','ice'],['rock','dark'],['dragon','psychic'],['psychic','steel'],['dragon','fire'],['ghost','dragon'],['dragon','electric'],['steel','dragon'],['psychic','ghost'],['dragon','flying']]) #I give up, I'm just hard coding labels
plt.xlabel('stats_total',size=15)
plt.ylabel('Pokemon types',size=15)

#averages
plt.axvline(x=win_types_top_10.get('stats_total').mean(), color='red') 

ax.text(win_types_top_10.get('stats_total').mean()-120,10.2, 'average stats_total of top 10 pokemon types',size=15)

In [None]:
(win_types[win_types.get('stats_total')>=700])
#conclusion: Kyurem, latias/latios, and tyrannitar are overpowered // each type combination only has 1 pokemon associated (verified with pokedex)

In [None]:
win_types[win_types.get('type1').str.contains('dragon') | win_types.get('type2').str.contains('dragon')]
#overall, dragons are pretty powerful, appearing 7 times in the top 10 as both primary and secondary types


In [None]:
drag_mean=win_types[win_types.get('type1').str.contains('dragon') | win_types.get('type2').str.contains('dragon')].get('stats_total').mean()
drag_mean
#dragon types overall(including both type1 and type2 dragon types) had a stats_total average of ~567

In [None]:
type1_consideration=pokemon_table.groupby('type1').mean().get(['hp','speed','attack','defense', 'sp_attack','sp_defense'])
type1_consideration=type1_consideration.assign(stats_total=type1_consideration.sum(axis=1)).sort_values(by='stats_total',ascending=False).get(['stats_total'])
print('conclusion: if considering only primary types, dragon is the best with a stats_total lead of about 31 points')
type1_consideration

In [None]:
fig, ax = plt.subplots(figsize=(18,12)) #set chart size
plt.barh(type1_consideration.index,type1_consideration.get('stats_total')) #horizontal bar graph of primary stats_total
plt.xlabel('stats_total',size=15) #label x axis
plt.ylabel('primary pokemon types',size=15) #label y axis

#supporting data viz
plt.axvline(x=type1_consideration.get('stats_total').mean(), color='red') #average stat across primary types

#set label for average line
ax.text(type1_consideration.get('stats_total').mean()-40,19, 'average stats_total',size=15)

print('dragons sit clearly at the top of primary types, standing at about an 86 stat point gap above the average!')

In [None]:
type2_consideration=pokemon_table.groupby('type2').mean().get(['hp','speed','attack','defense', 'sp_attack','sp_defense'])
type2_consideration=type2_consideration.assign(stats_total=type2_consideration.sum(axis=1)).sort_values(by='stats_total',ascending=False).get(['stats_total'])
print('Conclusion: if considering only secondary types, fighting is the best, but Dragon is second(lagging by about 6 stat points)!')
type2_consideration

In [None]:
type2_consideration.get('stats_total').mean()

In [None]:
fig, ax = plt.subplots(figsize=(18,12)) #set chart size
plt.barh(type2_consideration.index,type2_consideration.get('stats_total'))
plt.xlabel('stats_total',size=15)
plt.ylabel('secondary pokemon types',size=15)

#supporting data viz
plt.axvline(x=type2_consideration.get('stats_total').mean(), color='red') #average stat across secondary types

#set label for average line
ax.text(type2_consideration.get('stats_total').mean()-40,19, 'average stats_total',size=15)
print("Though it's not the top secondary type, dragon is still above the average stats_total for secondary type, leading by about 52 stat points!")

In [None]:
print('Final conclusion: Dragons are by far the strongest type, showing insanely high average stat points compared to other pokemon types.')

In [None]:
#Was it just a coincidence? Time for a permutation test!
permute_data=(
    pokemon_table.get(['hp','speed','attack','defense', 'sp_attack','sp_defense'])
)
permute_data=(
    permute_data.assign(stats_total=permute_data.sum(axis=1))
    .assign(type1=pokemon_table.get('type1')).assign(type2=pokemon_table.get('type2')).assign(name=pokemon_table.get('name'))
)
permute_data

In [None]:
#Test statistic-average stats_total of Dragon types
statistic=np.array([])
for _ in range(5000):
    permute_run=permute_data.assign(shuffled_total=np.random.permutation(permute_data.get('stats_total'))).get(['type1','type2','shuffled_total'])
    p_avg_total=permute_run[(permute_run.get('type1').str.contains('dragon')) | (permute_run.get('type2').str.contains('dragon'))].get('shuffled_total').mean()
    statistic=np.append(statistic,p_avg_total)

In [None]:
fig, ax = plt.subplots(figsize=(20,14))
plt.hist(statistic,density=True)
plt.xlabel('stats_total',size=15)
plt.ylabel('frequency',size=15)
plt.title('distribution of total dragon stat points after a permutation test',size=15)
plt.scatter(drag_mean, 0.0001, color='red')
ax.text(drag_mean-20,0.001,'dragon average stats_total',size=14)

In [None]:
p_value=np.mean(statistic>=drag_mean)
'after our permutation test, we saw a p-value of '+str(p_value)+' , meaning that it was definitely not coincidence that dragons were as strong as they were.'