#### Importação dos dados

In [1]:
import pandas as pd

In [2]:
pokemon = pd.read_csv('./data/pokemon.csv')
colors = pd.read_csv('./data/colors.csv')[['name', 'hex1', 'hex2', 'hex3']]

#### Tratamento de nulos

In [3]:
pokemon['type2'].fillna(pokemon['type1'], inplace=True)
pokemon['height_m'].fillna(pokemon['height_m'].median(), inplace=True)
pokemon['percentage_male'].fillna(pokemon['percentage_male'].median(), inplace=True)
pokemon['weight_kg'].fillna(pokemon['weight_kg'].median(), inplace=True)

#### Mescla de tabelas

In [4]:
merged_df = pd.merge(pokemon, colors, left_on='name', right_on='name', how='left')
pokemon = merged_df

#### Tranformação dos hex codes em cores rgb

In [5]:
# Função que transforma HEX em RGB
def hex_to_rgb(hex_code):
    hex_code = hex_code.lstrip('#')
    r = int(hex_code[0:2], 16)
    g = int(hex_code[2:4], 16)
    b = int(hex_code[4:6], 16)
    return r, g, b

In [6]:
# Criação de novas colunas para as cores RGB
pokemon['rgb1'] = pokemon['hex1'].apply(hex_to_rgb)
pokemon['rgb2'] = pokemon['hex2'].apply(hex_to_rgb)
pokemon['rgb3'] = pokemon['hex3'].apply(hex_to_rgb)

In [7]:
# Drop das colunas de HEX
pokemon = pokemon.drop(['hex1', 'hex2', 'hex3'], axis=1)

In [8]:
# Criação de novas colunas(Red, Green, Blue) para cada cor
pokemon['red1'] = pokemon['rgb1'].apply(lambda x: x[0])
pokemon['green1'] = pokemon['rgb1'].apply(lambda x: x[1])
pokemon['blue1'] = pokemon['rgb1'].apply(lambda x: x[2])

pokemon['red2'] = pokemon['rgb2'].apply(lambda x: x[0])
pokemon['green2'] = pokemon['rgb2'].apply(lambda x: x[1])
pokemon['blue2'] = pokemon['rgb2'].apply(lambda x: x[2])

pokemon['red3'] = pokemon['rgb3'].apply(lambda x: x[0])
pokemon['green3'] = pokemon['rgb3'].apply(lambda x: x[1])
pokemon['blue3'] = pokemon['rgb3'].apply(lambda x: x[2])

In [9]:
# Drop das colunas antigas de RGB
pokemon = pokemon.drop(['rgb1','rgb2','rgb3'], axis=1)

In [10]:
# Tratamento da coluna capture_rate
pokemon['capture_rate'] = pd.to_numeric(pokemon['capture_rate'], errors='coerce')

#### Coisas úteis para visualização de dados

In [11]:
# import matplotlib.pyplot as plt

# # pokemon[(pokemon['generation'] == 6)]['type1'].value_counts().plot(kind='pie')
# pokemon.groupby('type1')['sp_defense'].mean().plot(kind='bar')

In [12]:
# pokemon.groupby('type2')['sp_defense'].mean().plot(kind='bar')

#### Tratamento dos tipos de pokémon

In [13]:
# Criando união dos tipos únicos
unique_types = set(pokemon['type1']).union(set(pokemon['type2']))

In [14]:
# One-hot encoding da coluna de tipos
for type_col in unique_types:
    pokemon[type_col] = (pokemon['type1'] == type_col) | (pokemon['type2'] == type_col)

for type_col in unique_types:
    pokemon[type_col] = pokemon[type_col].astype(int)

In [15]:
# Drop das antigas colunas de tipos 1 e 2
pokemon.drop(['type1', 'type2'], axis=1, inplace=True)

#### Tratamento das habilidades

In [16]:
# # Converter 'abilities' em uma lista de strings
# pokemon['abilities'] = pokemon['abilities'].apply(lambda x: eval(x))

In [17]:
# # Usar MLB para fazer one-hot encoding na coluna de habilidades
# from sklearn.preprocessing import MultiLabelBinarizer

# mlb = MultiLabelBinarizer()
# abilities_encoded = pd.DataFrame(mlb.fit_transform(pokemon['abilities']), columns=mlb.classes_, index=pokemon.index)

# # Concatenar as tabelas
# pokemon = pd.concat([pokemon, abilities_encoded], axis=1)

# # Drop da coluna original
# pokemon.drop('abilities', axis=1, inplace=True)

# # Renomeando as colunas
# pokemon.columns = pokemon.columns.str.lower().str.replace(' ', '_')

In [18]:
pokemon.drop(['height_m', 'name', 'japanese_name', 'pokedex_number', 'percentage_male',
              'classfication', 'base_egg_steps', 'base_happiness', 'abilities',
              'experience_growth', 'defense', 'base_total', 'speed'], axis=1, inplace=True)

In [19]:
for column in pokemon.columns:
    pokemon[column] = pd.to_numeric(pokemon[column], errors='coerce')

In [20]:
pokemon['capture_rate'].fillna(pokemon['capture_rate'].median(), inplace=True)

In [21]:
not 0 in pokemon.isna().sum().tolist()

False

In [22]:
X = pokemon.drop(['grass', 'rock', 'psychic', 'normal', 'bug', 'fire', 'flying', 'electric', 'dark', 'dragon', 'poison', 'water', 'fighting', 'fairy', 'ice', 'ground', 'ghost', 'steel'], axis=1)

In [23]:
y = pokemon[['grass', 'rock', 'psychic', 'normal', 'bug', 'fire', 'flying', 'electric', 'dark', 'dragon', 'poison', 'water', 'fighting', 'fairy', 'ice', 'ground', 'ghost', 'steel']]

In [24]:
X.head()

Unnamed: 0,against_bug,against_dark,against_dragon,against_electric,against_fairy,against_fight,against_fire,against_flying,against_ghost,against_grass,...,is_legendary,red1,green1,blue1,red2,green2,blue2,red3,green3,blue3
0,1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,...,0,57,148,148,98,213,180,131,238,197
1,1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,...,0,90,205,189,32,148,131,49,123,82
2,1.0,1.0,1.0,0.5,0.5,0.5,2.0,2.0,1.0,0.25,...,0,32,180,156,90,213,197,255,123,115
3,0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,0.5,...,0,255,148,65,222,82,57,255,213,123
4,0.5,1.0,1.0,1.0,0.5,1.0,0.5,1.0,1.0,0.5,...,0,205,57,57,255,82,74,255,65,0


In [25]:
pd.set_option('display.max_columns', None)

In [26]:
from sklearn.preprocessing import Normalizer

norm_X = Normalizer().fit_transform(X)
norm_y = Normalizer().fit_transform(y)

In [27]:
from sklearn.model_selection import train_test_split

In [28]:
X_train, X_test, y_train, y_test = train_test_split(norm_X, norm_y, test_size=0.20, random_state=30)

In [29]:
from sklearn.linear_model import LinearRegression

In [30]:
reg = LinearRegression().fit(X_train, y_train)

In [31]:
y_pred = reg.predict(X_test)

In [32]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [33]:
mean_absolute_error(y_test, y_pred)

0.07321913760773027

In [34]:
mean_squared_error(y_test, y_pred)

0.011587340736029881

In [35]:
mean_squared_error(y_test, y_pred) ** (1/2)

0.10764451094240654

In [36]:
r2_score(y_test, y_pred)

0.7226884738488828