#### Importação dos dados

In [56]:
import pandas as pd

In [57]:
pokemon = pd.read_csv('./data/pokemon.csv')
colors = pd.read_csv('./data/colors.csv')[['name', 'hex1', 'hex2', 'hex3']]

#### Tratamento de nulos

In [58]:
pokemon['type2'].fillna(pokemon['type1'], inplace=True)
pokemon['height_m'].fillna(pokemon['height_m'].mean(), inplace=True)
pokemon['percentage_male'].fillna(pokemon['height_m'].mean(), inplace=True)
pokemon['weight_kg'].fillna(pokemon['height_m'].mean(), inplace=True)

#### Mescla de tabelas

In [59]:
merged_df = pd.merge(pokemon, colors, left_on='name', right_on='name', how='left')

# Drop da coluna Unnamed: 0
# merged_df = merged_df.drop(['Unnamed: 0'], axis=1)

pokemon = merged_df

#### Tranformação dos hex codes em cores rgb

In [60]:
# Função que transforma HEX em RGB
def hex_to_rgb(hex_code):
    hex_code = hex_code.lstrip('#')
    r = int(hex_code[0:2], 16)
    g = int(hex_code[2:4], 16)
    b = int(hex_code[4:6], 16)
    return r, g, b

In [61]:
# Criação de novas colunas para as cores RGB
pokemon['rgb1'] = pokemon['hex1'].apply(hex_to_rgb)
pokemon['rgb2'] = pokemon['hex2'].apply(hex_to_rgb)
pokemon['rgb3'] = pokemon['hex3'].apply(hex_to_rgb)

In [62]:
pokemon.head(3)

Unnamed: 0,abilities,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,...,type2,weight_kg,generation,is_legendary,hex1,hex2,hex3,rgb1,rgb2,rgb3
0,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,poison,6.9,1,0,#399494,#62D5B4,#83EEC5,"(57, 148, 148)","(98, 213, 180)","(131, 238, 197)"
1,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,poison,13.0,1,0,#5ACDBD,#209483,#317B52,"(90, 205, 189)","(32, 148, 131)","(49, 123, 82)"
2,"['Overgrow', 'Chlorophyll']",1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,...,poison,100.0,1,0,#20B49C,#5AD5C5,#FF7B73,"(32, 180, 156)","(90, 213, 197)","(255, 123, 115)"


In [63]:
# Drop das colunas de HEX
pokemon = pokemon.drop(['hex1', 'hex2', 'hex3'], axis=1)

In [64]:
# Criação de novas colunas(Red, Green, Blue) para cada cor
pokemon['red1'] = pokemon['rgb1'].apply(lambda x: x[0])
pokemon['green1'] = pokemon['rgb1'].apply(lambda x: x[1])
pokemon['blue1'] = pokemon['rgb1'].apply(lambda x: x[2])

pokemon['red2'] = pokemon['rgb2'].apply(lambda x: x[0])
pokemon['green2'] = pokemon['rgb2'].apply(lambda x: x[1])
pokemon['blue2'] = pokemon['rgb2'].apply(lambda x: x[2])

pokemon['red3'] = pokemon['rgb3'].apply(lambda x: x[0])
pokemon['green3'] = pokemon['rgb3'].apply(lambda x: x[1])
pokemon['blue3'] = pokemon['rgb3'].apply(lambda x: x[2])

In [65]:
# Drop das colunas antigas de RGB
pokemon = pokemon.drop(['rgb1','rgb2','rgb3'], axis=1)

#### Tratamento dos tipos de pokémon

In [66]:
# Criando união dos tipos únicos
unique_types = set(pokemon['type1']).union(set(pokemon['type2']))

In [67]:
# One-hot encoding da coluna de tipos
for type_col in unique_types:
    pokemon[type_col] = (pokemon['type1'] == type_col) | (pokemon['type2'] == type_col)

for type_col in unique_types:
    pokemon[type_col] = pokemon[type_col].astype(int)

In [68]:
# Drop das antigas colunas de tipos 1 e 2
pokemon.drop(['type1', 'type2'], axis=1, inplace=True)

#### Tratamento das habilidades

In [69]:
# Converter 'abilities' em uma lista de strings
pokemon['abilities'] = pokemon['abilities'].apply(lambda x: eval(x))

In [70]:
# Usar MLB para fazer one-hot encoding na coluna de habilidades
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
abilities_encoded = pd.DataFrame(mlb.fit_transform(pokemon['abilities']), columns=mlb.classes_, index=pokemon.index)

# Concatenar as tabelas
pokemon = pd.concat([pokemon, abilities_encoded], axis=1)

# Drop da coluna original
pokemon.drop('abilities', axis=1, inplace=True)

# Renomeando as colunas
pokemon.columns = pokemon.columns.str.lower().str.replace(' ', '_')