# Project 3

In [None]:
import pokebase as pb
pb.cache.API_CACHE

import json
import requests
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

from scipy.stats import pearsonr

## Gathering all Generation 1 Pokemon from [PokeAPI](https://pokeapi.co/)

**Note: pinging PokeAPI for each pokemon object in generation 1 is a slow process. Expect this to take anywhere around 15-30 sec.*

In [None]:
gen1 = requests.get('https://pokeapi.co/api/v2/generation/1')
gen1 = json.loads(gen1.content)

gen1Moves = gen1['moves']   # a list of moves included in the gen 1 games
gen1Types = gen1['types']   # a list of valid pokemon types in the gen 1
gen1Species = gen1['pokemon_species']   # a list of pokemon species introduced in gen 1

# Get pokemon objects for each of the pokemon from gen 1
gen1Pokemon = []
for species in gen1Species:
    pokemon = requests.get(f"https://pokeapi.co/api/v2/pokemon/{species['name']}")
    pokemon = json.loads(pokemon.content)
    gen1Pokemon.append(pokemon)

### Gather remaining Generation 2-9 Pokemon (Optional - To get full dataset for some analyses
This takes about 8 minutes. Not needed for most of the analysis but useful for some of our graphs and analyses.

In [None]:
# # Optionally get the rest of the generations (takes about 8 minutes)
# for i in range(2, 10): 
#     gen = requests.get(f'https://pokeapi.co/api/v2/generation/{i}')
#     if gen.status_code == 200:
#         try:
#             gen = json.loads(gen.content)
#             genSpecies = gen['pokemon_species']
#             genPokemon = []
#             for species in genSpecies:
#                 pokemon = requests.get(f"https://pokeapi.co/api/v2/pokemon/{species['name']}")
#                 # Some Pokemon throw errors, probably because they have unique naming in the API
#                 if pokemon.status_code == 200:
#                     try:
#                         pokemon = json.loads(pokemon.content)
#                         genPokemon.append(pokemon)
#                     except json.JSONDecodeError:
#                         print(f"Error decoding JSON for pokemon: {species['name']}")
#                 else:
#                     print(f"Error retrieving pokemon: {species['name']}")
#             # add to gen1Pokemon list
#             gen1Pokemon += genPokemon
#         except json.JSONDecodeError:
#             print(f"Error decoding JSON for generation {i}")
#     else:
#         print(f"Error retrieving generation {i}")

## Converting Pokemon JSON data into usable dataframes

In [None]:
# Create a dataframe for moves included in the gen 1 games
# (to be used in validating moves listed for each pokemon)
movesDF = pd.DataFrame(gen1Moves)
movesDF = movesDF[['name']]
movesDF.columns = ['move']
movesDF

**Note: pinging PokeAPI for each move object is a slow process. Expect this to take anywhere around 15-30 sec.*

In [None]:
# Add more info about each move
for index, row in movesDF.iterrows():
    moveDetails = requests.get(f'https://pokeapi.co/api/v2/move/{row["move"]}')
    moveDetails = json.loads(moveDetails.content)
    movesDF.at[index, 'power'] = moveDetails['power']
    movesDF.at[index, 'damage_class'] = moveDetails['damage_class']['name']
    movesDF.at[index, 'type'] = moveDetails['type']['name']

movesDF

In [None]:
# Create a dataframe of each pokemon and its various stats
pokemonDF = {
    'name': [],
    'generation': [],   # the generation the pokemon was introduced in
    'base_experience': [],  # experience gained from defeating this pokemon
    'height': [],   # in decimetres
    'weight': [],   # in hectograms
    'abilities': [],    # total number of abilities
    # 'moves': [],    # total number of moves learnable by this pokemon
    'type': [],
    'type-slot': [],    # denotes whether the recorded type is the pokemon's primary or secondary type
    # All stat values represent base stats
    'hp': [],
    'attack': [],
    'defense': [],
    'special-attack': [],
    'special-defense': [],
    'speed': [],
}

pokemonMovesDF = {  # all gen 1 moves learnable by pokemon in gen 1
    'name': [],
    'move': [],
    'level_learned_at': [],
}

for pokemon in gen1Pokemon:
    # Typings for some pokemon have changed since gen 1
    if len(pokemon['past_types']) > 0:
        typeList = pokemon['past_types'][0]['types']
    else:
        typeList = pokemon['types']

    for type in typeList:
        pokemonDF['type-slot'].append(type['slot'])
        pokemonDF['type'].append(type['type']['name'])

        pokemonDF['name'].append(pokemon['name'])
        thresholds = [152, 252, 387, 494, 650, 722, 810, 906] # the highest id of each generation
        for i, threshold in enumerate(thresholds, 1): 
            if pokemon['id'] < threshold: # place pokemon into generation based on id
                pokemonDF['generation'].append(i)
                break
        else:
            pokemonDF['generation'].append(len(thresholds) + 1)
        pokemonDF['base_experience'].append(pokemon['base_experience'])
        pokemonDF['height'].append(pokemon['height'])
        pokemonDF['weight'].append(pokemon['weight'])
        pokemonDF['abilities'].append(len(pokemon['abilities']))


        for stat in pokemon['stats']:
            pokemonDF[stat['stat']['name']].append(stat['base_stat'])

    # Add moves to a dictionary for now. 
    # Validity of these moves and a move count for each pokemon will be checked later
    for move in pokemon['moves']:
        pokemonMovesDF['name'].append(pokemon['name'])
        pokemonMovesDF['move'].append(move['move']['name'])
        pokemonMovesDF['level_learned_at'].append(move['version_group_details'][0]['level_learned_at'])
        
# Create a dataframe from the moves gathered from the pokemon
pokemonMovesDF = pd.DataFrame(pokemonMovesDF)
# Purge the overall moves list of non-gen-1 moves
pokemonMovesDF = pd.merge(pokemonMovesDF, movesDF, how='right')
# Interesting story: struggle is a move used by pokemon that have run out of all other moves,
# but is not itself a learnable move, thus the below is necessary
pokemonMovesDF = pokemonMovesDF.dropna(subset='name')
display(pokemonMovesDF)

# Count the number of valid gen 1 moves learnable by each pokemon
pokemonMoveCountsDF = pokemonMovesDF.groupby('name')['move'].count().reset_index()
pokemonMoveCountsDF.columns = ['name', 'moves']

# Create a dataframe of the pokemon stats
pokemonDF = pd.DataFrame(pokemonDF)
# Utilize a gen 1 types dataframe to get rid of any invalid types
typesDF = pd.DataFrame(gen1Types)
typesDF = typesDF[['name']]
typesDF.columns = ['type']
pokemonDF = pd.merge(pokemonDF, typesDF, how='right')

# Finally, add the move counts of each pokemon
pokemonDF = pd.merge(pokemonDF, pokemonMoveCountsDF)
pokemonDF['stat-total'] = pokemonDF['hp'] + pokemonDF['attack'] + pokemonDF['defense'] + pokemonDF['special-attack'] + pokemonDF['special-defense'] + pokemonDF['speed'] 
pokemonDF

**Keep in mind that a pokemon with two types will appear twice in this dataset and duplicates may need to be dropped for analyses not involving different typings.*

In [None]:
typeAveragesDF = pokemonDF.groupby('type').agg({
    'base_experience': 'mean',
    'height': 'mean',
    'weight': 'mean',
    'abilities': 'mean',
    'hp': 'mean',
    'attack': 'mean',
    'defense': 'mean',
    'special-attack': 'mean',
    'special-defense': 'mean',
    'speed': 'mean',
    'moves': 'mean',
    'stat-total': 'mean',
    'base_experience': 'mean',
}).reset_index()

typeAveragesDF

## Base experience of pokemon by type

In [None]:
sns.barplot(x='type', y='base_experience', data=typeAveragesDF.sort_values(ascending=False, by='base_experience'))
fig = plt.gcf()
fig.set_size_inches(10, 5)

In [None]:
sns.displot(data=pokemonDF[pokemonDF['type'].isin(['ice','bug'])], x='base_experience',  hue='type', kind='kde', common_norm=False)

plt.title('Base Experience Distribution of Ice and Bug Type Gen 1 Pokemon')
plt.xlabel('Base Experience')

mean = typeAveragesDF[typeAveragesDF['type'] == 'bug']['base_experience'].astype(float).iloc[0]
# std = typeAveragesDF[typeAveragesDF['type'] == 'bug']['std'].astype(float).iloc[0]
plt.axvline(mean, color='blue', linestyle='dashed', linewidth=2, label='Mean')

mean = typeAveragesDF[typeAveragesDF['type'] == 'ice']['base_experience'].astype(float).iloc[0]
# std = typeAveragesDF[typeAveragesDF['type'] == 'ice']['std'].astype(float).iloc[0]
plt.axvline(mean, color='orange', linestyle='dashed', linewidth=2, label='Mean')

fig = plt.gcf()
fig.set_size_inches(10, 5)

display(stats.ttest_ind(pokemonDF[pokemonDF['type'] == 'ice']['base_experience'], pokemonDF[pokemonDF['type'] == 'bug']['base_experience']))

In [None]:
lowestTypeStatsDF = pokemonDF.groupby('type').agg({
    'base_experience': 'min',
    'height': 'min',
    'weight': 'min',
    'abilities': 'min',
    'hp': 'min',
    'attack': 'min',
    'defense': 'min',
    'special-attack': 'min',
    'special-defense': 'min',
    'speed': 'min',
    'moves': 'min',
    'stat-total': 'min',
    'base_experience': 'min',
}).reset_index()

lowestTypeStatsDF.sort_values(by='base_experience')

In [None]:
highestTypeStatsDF = pokemonDF.groupby('type').agg({
    'base_experience': 'max',
    'height': 'max',
    'weight': 'max',
    'abilities': 'max',
    'hp': 'max',
    'attack': 'max',
    'defense': 'max',
    'special-attack': 'max',
    'special-defense': 'max',
    'speed': 'max',
    'moves': 'max',
    'stat-total': 'max',
    'base_experience': 'max',
}).reset_index()

highestTypeStatsDF.sort_values(by='base_experience', ascending=False)

## Stats of single-typed pokemon vs dual-typed pokemon

In [None]:
dualTypedDF = pokemonDF[pokemonDF['type-slot'] == 2]
display(dualTypedDF)
singleTypedDF = pokemonDF[~pokemonDF['name'].isin(dualTypedDF['name'])]
singleTypedDF

In [None]:
dualTypedAveragesDF = dualTypedDF[['hp', 'attack', 'defense', 'special-attack', 'special-defense', 'speed']].mean().reset_index()
dualTypedAveragesDF.columns = ['stat', 'average']
dualTypedAveragesDF['typing'] = 'dual'
display(dualTypedAveragesDF)

singleTypedAveragesDF = singleTypedDF[['hp', 'attack', 'defense', 'special-attack', 'special-defense', 'speed']].mean().reset_index()
singleTypedAveragesDF.columns = ['stat', 'average']
singleTypedAveragesDF['typing'] = 'single'
display(singleTypedAveragesDF)

typedAveragesDF = pd.concat([dualTypedAveragesDF, singleTypedAveragesDF]).reset_index()

sns.barplot(x='stat', y='average', hue='typing', data=typedAveragesDF)
fig = plt.gcf()
fig.set_size_inches(10, 5)

In [None]:
statDifferentialsTotal = 0
for i in range(len(dualTypedAveragesDF['average'])):
    statDifferentialsTotal += abs(dualTypedAveragesDF['average'][i] - singleTypedAveragesDF['average'][i])
print(f'Total of individual stat average differentials: {statDifferentialsTotal}')

dualTypedStatTotalAverage = dualTypedDF[['stat-total']].mean().reset_index()
singleTypedStatTotalAverage = singleTypedDF[['stat-total']].mean().reset_index()
statTotalDifferential = dualTypedStatTotalAverage.iloc[0,1] - singleTypedStatTotalAverage.iloc[0,1]
print(f'Difference between average stat totals: {statTotalDifferential}')

In [None]:
display(stats.ttest_ind(dualTypedDF['hp'], singleTypedDF['hp']))
display(stats.ttest_ind(dualTypedDF['attack'], singleTypedDF['attack']))
display(stats.ttest_ind(dualTypedDF['defense'], singleTypedDF['defense']))
display(stats.ttest_ind(dualTypedDF['special-attack'], singleTypedDF['special-attack']))
display(stats.ttest_ind(dualTypedDF['special-defense'], singleTypedDF['special-defense']))
display(stats.ttest_ind(dualTypedDF['speed'], singleTypedDF['speed']))

display(stats.ttest_ind(dualTypedDF['stat-total'], singleTypedDF['stat-total']))

## Pokemon Speed Stat by Type
Let's look at the Speed stat on the pokemon and see if there are any significant differences between the types.

In [None]:
sns.barplot(x='type', y='speed', data=typeAveragesDF.sort_values(ascending=False, by='speed'))
for index, value in enumerate(typeAveragesDF.sort_values(ascending=False, by='speed')['speed']): # Speed values on the top
    plt.text(index, value, str(round(value, 2)), ha='center', va='bottom')
fig = plt.gcf()
fig.set_size_inches(10, 5)
plt.show()

# box plot of speed
sns.boxplot(x='type', y='speed', data=pokemonDF)
# order from highest to lowest speed avg
order = typeAveragesDF.sort_values(ascending=False, by='speed')['type']
plt.xticks(order.index, order)

fig = plt.gcf()
fig.set_size_inches(10, 5)
plt.show()


Now let's look at the speed stat over generations to see if there is stat inflation. We can look at correlation between at stat and generation to see if there is a  relationship.

*(This requires pulling the full dataset of Pokemon. If used on only the gen 1 data, there will be invalid correlation)*

In [None]:
# Calculate the Pearson correlation coefficient
correlation_coefficient, p_value = pearsonr(pokemonDF['generation'], pokemonDF['speed'])

# Create a scatter plot with a regression line
plt.figure(figsize=(10, 6))
sns.regplot(x='generation', y='speed', data=pokemonDF, ci=None, scatter_kws={'alpha':0.5})
plt.title('Speed vs. Generation for Pokémon')
plt.xlabel('Generation')
plt.ylabel('Speed')
plt.show()

print(f"Pearson Correlation Coefficient: {correlation_coefficient}")
print(f"P-Value: {p_value}")
