In [1]:
# dependencies
import json
import pandas as pd
import re

# load types file
with open('../API_Data/pkmn.json', 'r') as in_file:
    pkmn = json.load(in_file)

In [2]:
pkmn_names_raw = pd.Series([pkmn[num]['name'] for num in range(len(pkmn))])

Here I'm going to want to change out form names to something that looks a little better (ex: raichu-alola -> Alolan Raichu)

In [3]:
def clean_names(name):
    
    # drop gigantimaxes, pikachu and eevee costumes, totems
    to_drop = ['gmax', 'pikachu-', 'eevee-', 'totem', 'floette-', 'greninja-', 'magearna-', 'cramorant-', 'zarude-']
    for pattern in to_drop:
        if re.match(f'.*{pattern}', name, flags = re.IGNORECASE):
            name = 'DROP'
            return name.title()

    # capture all region-specific forms
    regions = ['alol', 'galar', 'hisui']
    for region in regions:
        if re.match(f'.*{region}', name, flags=re.IGNORECASE):
            
            new = re.search(f'(.*)-{region}-?(.*)?', name)
            if new:
                if region in ['alol', 'hisui']:
                    name = f'{region}an {new.group(1).replace("-", " ")}'
                elif region in ['galar']:
                    if new.group(2):
                        name = f'{region}ian {new.group(1).replace("-", " ")} ({new.group(2).replace("-", " ")})'
                    else:
                        name = f'{region}ian {new.group(1).replace("-", " ")}'
            return name.title()
    
    # minior (just keep one color since they're all the same)
    if re.match(f'minior-.*', name, flags = re.IGNORECASE):
        new = re.search(f'minior-(.*)', name)
        if new.group(1) == 'red':
            name = f'minior (core form)'
        elif new.group(1) == 'red-meteor':
            name = f'minior (meteor form)'
        else:
            name = 'DROP'
        return name.title()    
    
    # pokemon with different formes
    forme_search = ['deoxys', 'dialga', 'palkia', 'giratina', 'shaymin', 'tornadus', 'thundurus', 'landorus', 'meloetta', 'aegislash', 'zygarde', 'enamorus']
    for pattern in forme_search:
        if re.match(f'{pattern}-.*', name, flags = re.IGNORECASE):
            new = re.search(f'{pattern}-(.*)', name)
            name = f'{pattern} ({new.group(1).replace("-", " ")} forme)'
            return name.title()
    
    # pokemon with different forms
    form_search = ['castform', 'basculin', 'keldeo', 'lycanroc', 'wishiwashi', 'toxtricity']
    for pattern in form_search:
        if re.match(f'{pattern}-.*', name, flags = re.IGNORECASE):
            new = re.search(f'{pattern}-(.*)', name)
            name = f'{pattern} ({new.group(1).replace("-", " ")} form)'
            return name.title()

    # pokemon with different styles
    style_search = ['oricorio', 'urshifu']
    for pattern in style_search:
        if re.match(f'{pattern}-.*', name, flags = re.IGNORECASE):
            new = re.search(f'{pattern}-(.*)', name)
            name = f'{pattern} ({new.group(1).replace("-", " ")} style)'
            return name.title()

    # replace dash with space
    poke_search = ['tapu']
    for pattern in poke_search:
        if re.match(f'{pattern}-.*', name, flags = re.IGNORECASE):
            new = re.search(f'{pattern}-(.*)', name)
            name = f'{pattern} {new.group(1).replace("-", " ")}'
            return name.title()
    
    # replace dash with space with () around modifier
    modifier_search = ['mimikyu', 'eiscue', 'kyurem', 'kyogre', 'groudon', 'hoopa', 'rockruff', 'necrozma', 'eternatus', 'darmanitan', 'zacian', 'zamazenta', 'calyrex', 'morpeko', 'pumpkaboo', 'gourgeist']
    for pattern in modifier_search:
        if re.match(f'{pattern}-.*', name, flags = re.IGNORECASE):
            new = re.search(f'{pattern}-(.*)', name)
            name = f'{pattern} ({new.group(1).replace("-", " ")})'
            return name.title()
    
    # pokemon with different genders
    gender_search = ['m', 'male', 'f', 'female']
    for pattern in gender_search:
        if re.match(f'.*-{pattern}$', name, flags = re.IGNORECASE):
            new = re.search(f'(.*)-{pattern}', name)
            name = f'{new.group(1)} ({pattern})'
            return name.title()
    
    # pokemon with different cloaks (burmy and wormadam)
    cloak_search = ['plant', 'sandy', 'trash']
    for pattern in cloak_search:
        if re.match(f'.*-{pattern}$', name, flags = re.IGNORECASE):
            new = re.search(f'(.*)-{pattern}', name)
            name = f'{new.group(1)} ({pattern} cloak)'
            return name.title()
    
    # rotom
    if re.match(f'rotom-.*', name, flags = re.IGNORECASE):
        new = re.search(f'rotom-(.*)', name)
        name = f'{new.group(1)} rotom'
        return name.title()

    # mr [mr]imes
    mime_search = ['mr-mime', 'mime-jr', 'mr-rime']
    for pattern in mime_search:
        if re.match(f'{pattern}', name, flags = re.IGNORECASE):
            new = re.search(f'({pattern})', name)
            name = new.group(1).replace("-", " ")
            return name.title()
    
    # type: null
    if re.match('type-null', name, flags = re.IGNORECASE):
        new = re.search(f'(.*)-(.*)', name)
        name = f'{new.group(1)}: {new.group(2)}'
        return name.title()

    # capture all mega evolutions
    if re.match('.*mega', name, flags=re.IGNORECASE):
        
        if re.match('.*-mega$', name, flags=re.IGNORECASE):
            new = re.search(r'(.*)-mega', name)
            if new:
                name = 'mega ' + new.group(1)
        elif re.match('.*-mega-[xy]', name, flags=re.IGNORECASE):
            new = re.search(r'(.*)-mega-([xy])', name)
            if new:
                name = 'mega ' + new.group(1) + ' ' + new.group(2)
            return name.title()
        
    return name.title()

In [4]:
pkmn_names = pkmn_names_raw.apply(clean_names)

## Build Final DF

In [5]:
pkmn_types_raw = []

for num in range(len(pkmn)):
    if len(pkmn[num]['types']) == 2:
        type1 = pkmn[num]['types'][0]['type']['name']
        type2 = pkmn[num]['types'][1]['type']['name']
        pkmn_types_raw.append({
            "type_1": type1,
            "type_2": type2
        })
    else:
        type1 = pkmn[num]['types'][0]['type']['name']
        type2 = None
        pkmn_types_raw.append({
            "type_1": type1,
            "type_2": type2
        })

pkmn_types_df = pd.DataFrame(pkmn_types_raw)

In [6]:
pkmn_names_df = pd.DataFrame({
    "name_raw": pkmn_names_raw,
    "name": pkmn_names
})

In [10]:
pkmn_final_df = pd.merge(pkmn_names_df, pkmn_types_df, left_index=True, right_index=True)

pkmn_final_df.set_index("name", inplace=True)

pkmn_final_df

Unnamed: 0_level_0,name_raw,type_1,type_2
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Bulbasaur,bulbasaur,grass,poison
Ivysaur,ivysaur,grass,poison
Venusaur,venusaur,grass,poison
Charmander,charmander,fire,
Charmeleon,charmeleon,fire,
...,...,...,...
Dialga (Origin Forme),dialga-origin,steel,dragon
Palkia (Origin Forme),palkia-origin,water,dragon
Basculin (White Striped Form),basculin-white-striped,water,
Basculegion (Female),basculegion-female,water,ghost


In [None]:
# use if pkmn_final_df.loc['Bulbasaur']['type_2'] to see if it's a monotype

## Send to sqlite

In [24]:
from sqlalchemy import create_engine

In [26]:
#create engine
engine = create_engine("sqlite:///../pkmn.sqlite")

# send to sqlite
pkmn_final_df.to_sql(name = 'pkmn_name_type', con = engine, if_exists='replace', index_label="name")

1154