# Who is the best pokemon?

importing necessary libraries

In [32]:
from determine_job import DetermineJob
from generate_data import generate_random_pokemon
from simulate import EloAlgo
import pandas as pd
import numpy as np
import requests
import os
import random

import warnings
warnings.filterwarnings("ignore")

### Getting the data from API

In [33]:
#parse_json takes a string url as input and then returns a dict with info of the specific pokemon from the API

def parse_json(url, isInfo):
    response = requests.get(url)
    content = response.json()
    
    if isInfo:
        name = content['name']
        moveset = ', '.join([move['move']['name'] for move in content['moves']])
        types = ', '.join([p_type['type']['name'] for p_type in content['types']])
        stats = [stat['base_stat'] for stat in content['stats']]
        
        poke_info = {'NAME': name , 'TYPE': types, 'HP': stats[0], 'ATTACK': stats[1], 'DEFENSE': stats[2], 
                                    'SPECIAL-ATTACK': stats[3], 'SPECIAL-DEFENSE': stats[4], 'SPEED': stats[5], 
                                    'TOTAL': sum(stats), 'MOVESET': moveset}
    else:
        p_type_name = content['name']
        poke_info = {p_type_name: {relation: [name['name'] for name in content['damage_relations'][relation]] 
                                   for relation in content['damage_relations']}}
        

    return poke_info
        

### Storing Pokemon Info in Excel File

Run through every pokemon in the pokeDB API and then store them in a Dataframe called poke_df

Done through this method to lessen time to debug and not constantly deal with timeout on API

In [34]:
if not os.path.isfile('excel_files/poke_db.xlsx'):
    poke_df = pd.DataFrame(columns=['NAME', 'TYPE', 'HP', 'ATTACK', 'DEFENSE', 
                                'SPECIAL-ATTACK', 'SPECIAL-DEFENSE', 'SPEED', 
                                'TOTAL', 'MOVESET'])

    poke_id = 1
    poke_id_secondary = 10001

    while True:
        try:
            pID = poke_id_secondary if poke_id > 1025 else poke_id
            poke_df = pd.concat([poke_df, pd.DataFrame([parse_json(f'https://pokeapi.co/api/v2/pokemon/{pID}/', True)])], ignore_index=True)

            if poke_id > 1025:
                poke_id_secondary += 1
            else:
                poke_id += 1
        except:
            break
        
    poke_df.to_excel('excel_files/poke_db.xlsx')
else:
    poke_df = pd.read_excel('excel_files/poke_db.xlsx')
    
poke_df = poke_df.drop('Unnamed: 0', axis=1)
poke_df.head()

Unnamed: 0,NAME,TYPE,HP,ATTACK,DEFENSE,SPECIAL-ATTACK,SPECIAL-DEFENSE,SPEED,TOTAL,MOVESET
0,bulbasaur,"grass, poison",45,49,49,65,65,45,318,"razor-wind, swords-dance, cut, bind, vine-whip..."
1,ivysaur,"grass, poison",60,62,63,80,80,60,405,"swords-dance, cut, bind, vine-whip, headbutt, ..."
2,venusaur,"grass, poison",80,82,83,100,100,80,525,"swords-dance, cut, bind, vine-whip, headbutt, ..."
3,charmander,fire,39,52,43,60,50,65,309,"mega-punch, fire-punch, thunder-punch, scratch..."
4,charmeleon,fire,58,64,58,80,65,80,405,"mega-punch, fire-punch, thunder-punch, scratch..."


### Storing Type Advantages in Excel File

Get the type advantages and input them into a dictionary and excel file to handle simulation of matches

In [35]:
if not os.path.isfile('excel_files/type_advantages.xlsx'):
    type_id = 1
    types = dict()
    
    while True:
        try:
            types.update(parse_json(f"https://pokeapi.co/api/v2/type/{type_id}", False))
            type_id += 1
        except:
            break
        
    move_adv_df = pd.DataFrame.from_dict(types)
    move_adv_df.to_excel('excel_files/type_advantages.xlsx')
else:
    move_adv_df = pd.read_excel('excel_files/type_advantages.xlsx')
    
move_adv_df = move_adv_df.drop('Unnamed: 0', axis=1)
move_adv_df.head()

Unnamed: 0,normal,fighting,flying,poison,ground,rock,bug,ghost,steel,fire,water,grass,electric,psychic,ice,dragon,dark,fairy
0,['fighting'],"['flying', 'psychic', 'fairy']","['rock', 'electric', 'ice']","['ground', 'psychic']","['water', 'grass', 'ice']","['fighting', 'ground', 'steel', 'water', 'grass']","['flying', 'rock', 'fire']","['ghost', 'dark']","['fighting', 'ground', 'fire']","['ground', 'rock', 'water']","['grass', 'electric']","['flying', 'poison', 'bug', 'fire', 'ice']",['ground'],"['bug', 'ghost', 'dark']","['fighting', 'rock', 'steel', 'fire']","['ice', 'dragon', 'fairy']","['fighting', 'bug', 'fairy']","['poison', 'steel']"
1,[],"['normal', 'rock', 'steel', 'ice', 'dark']","['fighting', 'bug', 'grass']","['grass', 'fairy']","['poison', 'rock', 'steel', 'fire', 'electric']","['flying', 'bug', 'fire', 'ice']","['grass', 'psychic', 'dark']","['ghost', 'psychic']","['rock', 'ice', 'fairy']","['bug', 'steel', 'grass', 'ice']","['ground', 'rock', 'fire']","['ground', 'rock', 'water']","['flying', 'water']","['fighting', 'poison']","['flying', 'ground', 'grass', 'dragon']",['dragon'],"['ghost', 'psychic']","['fighting', 'dragon', 'dark']"
2,[],"['rock', 'bug', 'dark']","['fighting', 'bug', 'grass']","['fighting', 'poison', 'bug', 'grass', 'fairy']","['poison', 'rock']","['normal', 'flying', 'poison', 'fire']","['fighting', 'ground', 'grass']","['poison', 'bug']","['normal', 'flying', 'rock', 'bug', 'steel', '...","['bug', 'steel', 'fire', 'grass', 'ice', 'fairy']","['steel', 'fire', 'water', 'ice']","['ground', 'water', 'grass', 'electric']","['flying', 'steel', 'electric']","['fighting', 'psychic']",['ice'],"['fire', 'water', 'grass', 'electric']","['ghost', 'dark']","['fighting', 'bug', 'dark']"
3,"['rock', 'steel']","['flying', 'poison', 'bug', 'psychic', 'fairy']","['rock', 'steel', 'electric']","['poison', 'ground', 'rock', 'ghost']","['bug', 'grass']","['fighting', 'ground', 'steel']","['fighting', 'flying', 'poison', 'ghost', 'ste...",['dark'],"['steel', 'fire', 'water', 'electric']","['rock', 'fire', 'water', 'dragon']","['water', 'grass', 'dragon']","['flying', 'poison', 'bug', 'steel', 'fire', '...","['grass', 'electric', 'dragon']","['steel', 'psychic']","['steel', 'fire', 'water', 'ice']",['steel'],"['fighting', 'dark', 'fairy']","['poison', 'steel', 'fire']"
4,['ghost'],[],['ground'],[],['electric'],[],[],"['normal', 'fighting']",['poison'],[],[],[],[],[],[],[],['psychic'],['dragon']


### Finding Stat Spread of all Pokemons

The point of this is that the way that when jobs will need to be determined later, we can use the average numbers for the base stats for most pokemon where their job does not require a specific upper bound stat

Ex: For a pokemon that is a physical sweeper, by standard pokemon users, the physical sweeper must have minimum 101 speed and 110 attack to be considered strong, the rest of the stats are irrelevant thus they can be the base stats

In [36]:
stat_names = ['HP', 'ATTACK', 'DEFENSE', 'SPECIAL-ATTACK', 'SPECIAL-DEFENSE', 'SPEED']

for name in stat_names:
    print(f"Average {name} is {round(np.mean(poke_df[name]))}")
    

Average HP is 71
Average ATTACK is 82
Average DEFENSE is 75
Average SPECIAL-ATTACK is 74
Average SPECIAL-DEFENSE is 73
Average SPEED is 71


### Testing accuracy of DetermineJob Class

The DetermineJob class has multiple methods, the method that is used is get_jobs_primary and get_jobs_secondary, 
they take input of an array of stats and their moveset, they outputs an array of their jobs

This is simply to test how accurate the job assigning system is, from the 100 different pokemons, 
it seems almost perfectly accurate in what their roles are

In [37]:
job_determine = DetermineJob()

for i in range(100):
    print(poke_df.iloc[i]['NAME'], job_determine.get_jobs_primary([poke_df.iloc[i]['HP'], poke_df.iloc[i]['ATTACK'], poke_df.iloc[i]['DEFENSE'], 
                           poke_df.iloc[i]['SPECIAL-ATTACK'], poke_df.iloc[i]['SPECIAL-DEFENSE'], poke_df.iloc[i]['SPEED']]))
    print(job_determine.get_jobs_secondary(poke_df.iloc[i]['MOVESET']))

bulbasaur Jack-Of-All-Trades
['Staller', 'Toxi-Shufflers']
ivysaur Jack-Of-All-Trades
['Staller', 'Toxi-Shufflers']
venusaur Jack-Of-All-Trades
['Pseudo-Hazer', 'Staller', 'Toxi-Shufflers', 'Para-Shufflers']
charmander Special Sweeper
['Pseudo-Hazer', 'Staller', 'Toxi-Shufflers', 'Para-Shufflers']
charmeleon Special Sweeper
['Pseudo-Hazer', 'Staller', 'Toxi-Shufflers', 'Para-Shufflers']
charizard Special Sweeper
['Pseudo-Hazer', 'Staller', 'Toxi-Shufflers', 'Para-Shufflers']
squirtle Wall
['Hazer', 'Staller', 'Toxi-Shufflers']
wartortle Wall
['Staller', 'Toxi-Shufflers']
blastoise Jack-Of-All-Trades
['Pseudo-Hazer', 'Staller', 'Toxi-Shufflers', 'Para-Shufflers']
caterpie Jack-Of-All-Trades
[]
metapod Physical Tank
[]
butterfree Jack-Of-All-Trades
['Pseudo-Hazer', 'Staller', 'Toxi-Shufflers', 'Para-Shufflers', 'Pivots']
weedle Physical Sweeper
[]
kakuna Physical Tank
[]
beedrill Physical Sweeper
['Staller', 'Toxi-Shufflers', 'Pivots']
pidgey Physical Sweeper
['Pseudo-Hazer', 'Staller', 

### Creating Synthetic Data to predict actual Pokemons

Creating a dataset consisting of 10,000 random pokemons which will be assinged jobs, once they are assinged jobs, the dataset will be used to predict the jobs of the actual pokemon

The idea of this is to basically create comepletely random pokemon with random stats and based off of this, they will be assinged a job using DetermineJob's get_jobs_primary method, this method is used since it is based only on stats whereas the get_jobs_secondary is only off of their moveset which doesn't require any predictions

In [38]:
pokemon_df = generate_random_pokemon(100000)
pokemon_df.sample(n=10)

Unnamed: 0,HP,Attack,Defense,Special-Attack,Special-Defense,Speed,Job
11657,53,142,55,127,198,16,Special Tank
10227,217,49,89,14,135,15,Wall
94663,181,148,204,119,125,57,Physical Tank
15886,85,65,64,82,71,76,Jack-Of-All-Trades
12499,250,106,173,107,229,177,Wall
75584,2,46,139,113,103,33,Wall
36884,166,83,22,121,144,142,Jack-Of-All-Trades
17759,237,185,214,18,131,74,Wall
96026,182,20,58,172,94,94,Jack-Of-All-Trades
95383,30,118,54,173,63,78,Special Sweeper


### Creating Model for Predictions

Using the pokemon_df dataset created to predict the jobs of actual pokemons

RandomForest was used because jobs are more cluster based rather than being linear based, they can vary a lot in terms of their stats spread

In [39]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

pokemon_df['Job'] = pokemon_df['Job'].astype('category')

X_train, X_test, y_train, y_test = train_test_split(pokemon_df.drop(['Job'], axis='columns'), pokemon_df.Job, test_size=0.2, random_state=42)
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
clf.score(X_test, y_test)

0.92545

### Using Model to Predict Pokemon Jobs

Assinging the primary job to the pokemons, this is only stats based

Also assinging secondary jobs which is based on movesets

In [40]:
poke_df['Primary Job'.upper()] = None
poke_df['Secondary Job'.upper()] = None

for index, row in poke_df.iterrows():
    stats = [poke_df.at[index, stat.upper()] for stat in stat_names]
    moveset = poke_df.at[index, 'MOVESET']
    if type(moveset) != float:
        poke_df.at[index, 'Secondary Job'.upper()] = (', ').join(job_determine.get_jobs_secondary(moveset)).upper()
    poke_df.at[index, 'Primary Job'.upper()] = clf.predict([stats])
    
poke_df.sample(n=10)

Unnamed: 0,NAME,TYPE,HP,ATTACK,DEFENSE,SPECIAL-ATTACK,SPECIAL-DEFENSE,SPEED,TOTAL,MOVESET,PRIMARY JOB,SECONDARY JOB
182,marill,"water, fairy",70,20,50,20,50,40,250,"mega-punch, ice-punch, slam, mega-kick, headbu...",Wall,"STALLER, TOXI-SHUFFLERS"
882,arctovish,"water, ice",90,90,100,80,90,55,505,"body-slam, bite, water-gun, hydro-pump, surf, ...",Wall,STALLER
552,krookodile,"ground, dark",95,117,80,65,70,92,519,"mega-punch, cut, mega-kick, sand-attack, body-...",Jack-Of-All-Trades,"PSEUDO-HAZER, STALLER, TOXI-SHUFFLERS, PARA-SH..."
671,skiddo,grass,66,65,48,62,57,52,350,"vine-whip, tackle, body-slam, take-down, doubl...",Jack-Of-All-Trades,"PSEUDO-HAZER, STALLER, TOXI-SHUFFLERS, PARA-SH..."
532,gurdurr,fighting,85,105,85,40,50,40,405,"pound, mega-punch, fire-punch, ice-punch, thun...",Physical Tank,"STALLER, TOXI-SHUFFLERS"
271,ludicolo,"water, grass",80,70,70,90,100,70,480,"mega-punch, fire-punch, ice-punch, thunder-pun...",Jack-Of-All-Trades,"STALLER, TOXI-SHUFFLERS"
892,zarude,"dark, grass",105,120,105,70,95,105,600,"mega-punch, scratch, swords-dance, bind, vine-...",Jack-Of-All-Trades,"PSEUDO-HAZER, STALLER, TOXI-SHUFFLERS, PARA-SH..."
157,totodile,water,50,65,64,44,48,43,314,"mega-punch, ice-punch, scratch, razor-wind, sw...",Physical Sweeper,"STALLER, TOXI-SHUFFLERS"
753,lurantis,grass,70,105,90,80,90,45,480,"swords-dance, take-down, hyper-beam, growth, r...",Jack-Of-All-Trades,"STALLER, TOXI-SHUFFLERS"
327,trapinch,ground,45,100,45,45,45,10,290,"gust, sand-attack, headbutt, body-slam, double...",Physical Sweeper,"STALLER, TOXI-SHUFFLERS"


### Elo Algorithm on Pokemons

Pokemon's starting ELO is based off their total base stats as this is a general metric to understand how good pokemons are


In [41]:
simulator = EloAlgo()

def handle_elo(df, newcol_name, from_col):
    df[newcol_name] = df[from_col]

    pokemons = [
        {
            'index': index,
            'name': row['NAME'],
            'stats': [row[stat] for stat in ['HP', 'ATTACK', 'DEFENSE', 'SPECIAL-ATTACK', 'SPECIAL-DEFENSE', 'SPEED']],
            'type': [t.strip() for t in row['TYPE'].split(',')],
            newcol_name: row[newcol_name],
            from_col: row[from_col]
        } for index, row in df.iterrows()
    ]

    elo_changes = {poke['index']: 1500 for poke in pokemons}

    for i, poke1 in enumerate(pokemons):
        for j in range(i + 1, len(pokemons)):
            poke2 = pokemons[j]
            new_elo1, new_elo2 = simulator.simulate_round(poke1, poke2, move_adv_df, from_col,
                                                          elo_changes[poke1['index']], elo_changes[poke2['index']])
            elo_changes[poke1['index']] = new_elo1
            elo_changes[poke2['index']] = new_elo2

    for poke in pokemons:
        index = poke['index']
        df.loc[index, newcol_name] = elo_changes[index]

    return df

In [42]:
poke_df = handle_elo(poke_df, 'STATS ELO', 'TOTAL')
poke_df.to_excel('ratings.xlsx')