<p align="center">
  <img src="assets/pokemon-center.gif" alt="PKCenter" width="300"/>
</p>

In [1]:
# Checkpoint to begin loading the Showdown data set

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats as sp

gen5_types_df = pd.read_csv("./dataset/pokemon-fandom/pokemon_gen5_types_fixed.csv")
gen5_stats_df = pd.read_csv(r"dataset/bulbagarden/pokemon_gen5_stats.csv")
showdown_df = pd.read_csv("showdown.csv")
unique_pokemons_df = pd.read_csv("unique_pokemons.csv")
gen5ou_usage_df = pd.read_csv("gen5ou_usage.csv")


In [2]:
usage_elo_df = unique_pokemons_df
elo_ranges = ["1000-1290", "1291-1390", "1391-1490", "1491-2000"]

for elo_range in elo_ranges:
    usage_elo_df[elo_range + " raw"] = 0
    usage_elo_df[elo_range + " percent"] = 0
usage_elo_df

Unnamed: 0,pokemon,1000-1290 raw,1000-1290 percent,1291-1390 raw,1291-1390 percent,1391-1490 raw,1391-1490 percent,1491-2000 raw,1491-2000 percent
0,Amoonguss,0,0,0,0,0,0,0,0
1,Sableye,0,0,0,0,0,0,0,0
2,Sawsbuck,0,0,0,0,0,0,0,0
3,Honchkrow,0,0,0,0,0,0,0,0
4,Ferrothorn,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
199,Electrode,0,0,0,0,0,0,0,0
200,Nidoqueen,0,0,0,0,0,0,0,0
201,Sigilyph,0,0,0,0,0,0,0,0
202,Umbreon,0,0,0,0,0,0,0,0


In [3]:
for row in showdown_df.itertuples():
    elo = showdown_df.at[row.Index, "Elo"]
    
    elo_range = ""
    if elo > 1490:
        elo_range = "1491-2000"
    elif elo > 1390:
        elo_range = "1391-1490"
    elif elo > 1290:
        elo_range = "1291-1390"
    else:
        elo_range = "1000-1290"
        
        
    for i in range(1, 7):
        pkmn_name = showdown_df.loc[row.Index, f"Pokemon {i}"]
        usage_elo_df.loc[usage_elo_df["pokemon"] == pkmn_name, elo_range + " raw"] += 1
usage_elo_df

Unnamed: 0,pokemon,1000-1290 raw,1000-1290 percent,1291-1390 raw,1291-1390 percent,1391-1490 raw,1391-1490 percent,1491-2000 raw,1491-2000 percent
0,Amoonguss,3,0,41,0,25,0,17,0
1,Sableye,2,0,5,0,8,0,0,0
2,Sawsbuck,0,0,0,0,1,0,0,0
3,Honchkrow,1,0,2,0,3,0,0,0
4,Ferrothorn,25,0,287,0,160,0,50,0
...,...,...,...,...,...,...,...,...,...
199,Electrode,0,0,1,0,0,0,0,0
200,Nidoqueen,0,0,2,0,0,0,0,0
201,Sigilyph,0,0,5,0,0,0,1,0
202,Umbreon,2,0,5,0,2,0,0,0


In [None]:
for elo_range in elo_ranges:
    total = usage_elo_df[elo_range + " raw"].sum()
    usage_elo_df[elo_range + " percent"] = usage_elo_df[elo_range + " raw"] / total * 100
usage_elo_df

Unnamed: 0,pokemon,1000-1290 raw,1000-1290 percent,1291-1390 raw,1291-1390 percent,1391-1490 raw,1391-1490 percent,1491-2000 raw,1491-2000 percent
0,Amoonguss,3,0.454545,41,0.694444,25,0.805932,17,1.582868
1,Sableye,2,0.303030,5,0.084688,8,0.257898,0,0.000000
2,Sawsbuck,0,0.000000,0,0.000000,1,0.032237,0,0.000000
3,Honchkrow,1,0.151515,2,0.033875,3,0.096712,0,0.000000
4,Ferrothorn,25,3.787879,287,4.861111,160,5.157963,50,4.655493
...,...,...,...,...,...,...,...,...,...
199,Electrode,0,0.000000,1,0.016938,0,0.000000,0,0.000000
200,Nidoqueen,0,0.000000,2,0.033875,0,0.000000,0,0.000000
201,Sigilyph,0,0.000000,5,0.084688,0,0.000000,1,0.093110
202,Umbreon,2,0.303030,5,0.084688,2,0.064475,0,0.000000


In [None]:
for elo_range in elo_ranges:
    stat, p = sp.shapiro(usage_elo_df[f"{elo_range} percent"])
    print(f"Shapiro-Wilk test {elo_range}: stat={stat}, p={p}")
    if p > 0.05:
        print("Likely normal")
    else:
        print("Likely not normal")

Shapiro-Wilk test 1000-1290: stat=0.5600170552018435, p=1.8970652961682416e-22
Likely not normal
Shapiro-Wilk test 1291-1390: stat=0.5379253711143102, p=6.254748790470104e-23
Likely not normal
Shapiro-Wilk test 1391-1490: stat=0.5273620837701019, p=3.7328310262572893e-23
Likely not normal
Shapiro-Wilk test 1491-2000: stat=0.5138679525923513, p=1.9553226049823525e-23
Likely not normal


In [10]:
usage_percent_elo = []
for elo_range in elo_ranges:
    usage_percent_elo.append(usage_elo_df[f"{elo_range} percent"])

stat, p = sp.levene(usage_percent_elo[0], usage_percent_elo[1], usage_percent_elo[2], usage_percent_elo[3])
print(f"Levene's test: stat={stat}, p={p}")
if p > 0.05:
    print("Likely have equal variances")
else:
    print("Likely do not have equal variances")

Levene's test: stat=0.01791569872172893, p=0.9967361166053423
Likely have equal variances


In [11]:
stat, p = sp.kruskal(usage_percent_elo[0], usage_percent_elo[1], usage_percent_elo[2], usage_percent_elo[3])
print(f"Levene's test: stat={stat}, p={p}")
if p > 0.05:
    print("Likely have equal medians")
else:
    print("Likely do not have equal medians")

Levene's test: stat=19.658833296264717, p=0.00019974100624276498
Likely do not have equal medians


In [19]:
import scikit_posthocs as scik

data = [usage_percent_elo[i] for i in range(4)]

p_values = scik.posthoc_dunn(data, p_adjust='holm')

labels = ['1000-1290', '1291-1390', '1391-1490', '1491-2000']
p_values.columns = labels
p_values.index = labels

print(p_values)
print("")
print(p_values < 0.05)

           1000-1290  1291-1390  1391-1490  1491-2000
1000-1290   1.000000   0.095792   0.992621   0.107916
1291-1390   0.095792   1.000000   0.095792   0.000056
1391-1490   0.992621   0.095792   1.000000   0.107916
1491-2000   0.107916   0.000056   0.107916   1.000000

           1000-1290  1291-1390  1391-1490  1491-2000
1000-1290      False      False      False      False
1291-1390      False      False      False       True
1391-1490      False      False      False      False
1491-2000      False       True      False      False
