#### Importação dos dados

In [1]:
import pandas as pd

In [2]:
pokemon = pd.read_csv('./data/pokemon.csv')
types = pd.read_csv('./data/types.csv')[['identifier', 'color']]

#### Tratamento de nulos

In [3]:
pokemon['type2'].fillna(pokemon['type1'], inplace=True)

#### Mescla de tabelas

In [4]:
merged_df = pd.merge(pokemon, types, left_on='type1', right_on='identifier', how='left')
merged_df.rename(columns={'color': 'type1_color'}, inplace=True)

merged_df = pd.merge(merged_df, types, left_on='type2', right_on='identifier', how='left')
merged_df.rename(columns={'color': 'type2_color'}, inplace=True)

# Drop dos identificadores
merged_df = merged_df.drop(['identifier_x', 'identifier_y'], axis=1)

pokemon = merged_df

#### Tranformação dos hex codes em cores rgb

In [5]:
# Função que transforma HEX em RGB
def hex_to_rgb(hex_code):
    hex_code = hex_code.lstrip('#')
    r = int(hex_code[0:2], 16)
    g = int(hex_code[2:4], 16)
    b = int(hex_code[4:6], 16)
    return r, g, b

In [6]:
# Criação de novas colunas para as cores RGB
pokemon['type1_color_rgb'] = pokemon['type1_color'].apply(hex_to_rgb)
pokemon['type2_color_rgb'] = pokemon['type2_color'].apply(hex_to_rgb)

In [7]:
# Criação de novas colunas(Red, Green, Blue) para cada cor
pokemon['type1_color_red'] = pokemon['type1_color_rgb'].apply(lambda x: x[0])
pokemon['type1_color_green'] = pokemon['type1_color_rgb'].apply(lambda x: x[1])
pokemon['type1_color_blue'] = pokemon['type1_color_rgb'].apply(lambda x: x[2])

pokemon['type2_color_red'] = pokemon['type2_color_rgb'].apply(lambda x: x[0])
pokemon['type2_color_green'] = pokemon['type2_color_rgb'].apply(lambda x: x[1])
pokemon['type2_color_blue'] = pokemon['type2_color_rgb'].apply(lambda x: x[2])

In [8]:
# Drop das colunas antigas de cores
pokemon = pokemon.drop(['type1_color','type2_color','type1_color_rgb','type2_color_rgb'], axis=1)

#### Tratamento dos tipos de pokémon

In [9]:
# Criando união dos tipos únicos
unique_types = set(pokemon['type1']).union(set(pokemon['type2']))

In [10]:
# One-hot encoding da coluna de tipos
for type_col in unique_types:
    pokemon[type_col] = (pokemon['type1'] == type_col) | (pokemon['type2'] == type_col)

for type_col in unique_types:
    pokemon[type_col] = pokemon[type_col].astype(int)

In [11]:
# Drop das antigas colunas de tipos 1 e 2
pokemon.drop(['type1', 'type2'], axis=1, inplace=True)

#### Tratamento das habilidades

In [12]:
# Converter 'abilities' em uma lista de strings
pokemon['abilities'] = pokemon['abilities'].apply(lambda x: eval(x))

In [13]:
# Usar MLB para fazer one-hot encoding na coluna de habilidades
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
abilities_encoded = pd.DataFrame(mlb.fit_transform(pokemon['abilities']), columns=mlb.classes_, index=pokemon.index)

# Concatenar as tabelas
pokemon = pd.concat([pokemon, abilities_encoded], axis=1)

# Drop da coluna original
pokemon.drop('abilities', axis=1, inplace=True)

# Renomeando as colunas
pokemon.columns = pokemon.columns.str.lower().str.replace(' ', '_')

In [21]:
pokemon.head(3)

Unnamed: 0,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,against_grass,...,water_absorb,water_bubble,water_compaction,water_veil,weak_armor,white_smoke,wimp_out,wonder_guard,wonder_skin_,zen_mode
0,1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,...,0,0,0,0,0,0,0,0,0,0
1,1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,...,0,0,0,0,0,0,0,0,0,0
2,1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,...,0,0,0,0,0,0,0,0,0,0
