<a href="https://colab.research.google.com/github/leadeev/Machine-learning/blob/main/2.2%20ML%20Classification%20-%20KNN%20distances%20with%20Pokemons.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.neighbors import NearestNeighbors

# On importe notre BDD des pokemons
link = "https://raw.githubusercontent.com/murpi/wilddata/master/pokemon.csv"
df_pokemon = pd.read_csv(link)
# On utilise la colonne # comme index
df_pokemon.set_index('#', inplace=True)
df_pokemon

# FYI : Dans ce challenge, nous aurons un usage très particulier du KNN. Nous allons uniquement utiliser la première étape
# => 1. Trouver les plus proches voisins, c'est-à-dire les voisins avec la distance la plus faible.

Unnamed: 0_level_0,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
5,Charmander,Fire,,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...
796,Diancie,Rock,Fairy,50,100,150,100,150,50,6,True
797,Mega Diancie,Rock,Fairy,50,160,110,160,110,110,6,True
798,Hoopa Confined,Psychic,Ghost,80,110,60,150,130,70,6,True
799,Hoopa Unbound,Psychic,Dark,80,160,60,170,130,80,6,True


In [None]:
df_pokemon.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 800 entries, 1 to 800
Data columns (total 11 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        800 non-null    object
 1   Type 1      800 non-null    object
 2   Type 2      414 non-null    object
 3   HP          800 non-null    int64 
 4   Attack      800 non-null    int64 
 5   Defense     800 non-null    int64 
 6   Sp. Atk     800 non-null    int64 
 7   Sp. Def     800 non-null    int64 
 8   Speed       800 non-null    int64 
 9   Generation  800 non-null    int64 
 10  Legendary   800 non-null    bool  
dtypes: bool(1), int64(7), object(3)
memory usage: 69.5+ KB


In [None]:
# 386 rows 'Type 2' sont vides (sur 800) => on "drop" cette colonne.
df_pokemon.drop('Type 2', axis=1, inplace=True)
df_pokemon.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 800 entries, 1 to 800
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Name        800 non-null    object
 1   Type 1      800 non-null    object
 2   HP          800 non-null    int64 
 3   Attack      800 non-null    int64 
 4   Defense     800 non-null    int64 
 5   Sp. Atk     800 non-null    int64 
 6   Sp. Def     800 non-null    int64 
 7   Speed       800 non-null    int64 
 8   Generation  800 non-null    int64 
 9   Legendary   800 non-null    bool  
dtypes: bool(1), int64(7), object(2)
memory usage: 63.3+ KB


In [None]:
# On transforme "Type 1" en valeurs numériques avec la méthode 'get_dummies()'
# Car ML ne comprend pas des strings
type1_int = pd.get_dummies(df_pokemon['Type 1'])
df_pokemon = pd.concat([df_pokemon, type1_int], axis=1).drop('Type 1', axis=1)
df_pokemon

Unnamed: 0_level_0,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Bug,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,Bulbasaur,45,49,49,65,65,45,1,False,0,...,0,1,0,0,0,0,0,0,0,0
2,Ivysaur,60,62,63,80,80,60,1,False,0,...,0,1,0,0,0,0,0,0,0,0
3,Venusaur,80,82,83,100,100,80,1,False,0,...,0,1,0,0,0,0,0,0,0,0
4,Mega Venusaur,80,100,123,122,120,80,1,False,0,...,0,1,0,0,0,0,0,0,0,0
5,Charmander,39,52,43,60,50,65,1,False,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
796,Diancie,50,100,150,100,150,50,6,True,0,...,0,0,0,0,0,0,0,1,0,0
797,Mega Diancie,50,160,110,160,110,110,6,True,0,...,0,0,0,0,0,0,0,1,0,0
798,Hoopa Confined,80,110,60,150,130,70,6,True,0,...,0,0,0,0,0,0,1,0,0,0
799,Hoopa Unbound,80,160,60,170,130,80,6,True,0,...,0,0,0,0,0,0,1,0,0,0


In [None]:
# FYI : Entraine l'algorithme NN en utilisant l'ensemble des colonnes numériques en variables explicatives "X" :

# On utilise uniquement les pokemons non légendaires, ctd 'Legendary' = False
X = df_pokemon[df_pokemon['Legendary'] == False]

# On "drop" les colonnes "Name" et "Legendary" pour avoir uniquement les colonnes numériques
X = X.drop(['Name', 'Legendary'], axis=1)

# On "fit" le modèle pour avoir les 3 pokemons voisins plus proches
distanceKNN = NearestNeighbors(n_neighbors=3).fit(X)

In [None]:
# Caractéristiques des 6 pokemons habituels du champion
Mewtwo = df_pokemon[df_pokemon['Name'] == "Mewtwo"].drop(['Name', 'Legendary'], axis=1)
Lugia = df_pokemon[df_pokemon['Name'] == "Lugia"].drop(['Name', 'Legendary'], axis=1)
Rayquaza = df_pokemon[df_pokemon['Name'] == "Rayquaza"].drop(['Name', 'Legendary'], axis=1)
Giratina = df_pokemon[df_pokemon['Name'] == "Giratina Origin Forme"].drop(['Name', 'Legendary'], axis=1)
Dialga = df_pokemon[df_pokemon['Name'] == "Dialga"].drop(['Name', 'Legendary'], axis=1)
Palkia = df_pokemon[df_pokemon['Name'] == "Palkia"].drop(['Name', 'Legendary'], axis=1)

# Exemple de "Mewtwo"
display(Mewtwo)

Unnamed: 0_level_0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Bug,Dark,Dragon,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
#,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
163,106,110,90,154,90,130,1,0,0,0,...,0,0,0,0,0,0,1,0,0,0


In [None]:
# Les 3 voisins les plus proches non légendaires (numéro du pokemon)
Mewtwo_voisins = distanceKNN.kneighbors(Mewtwo)[1][0]
Lugia_voisins = distanceKNN.kneighbors(Lugia)[1][0]
Rayquaza_voisins = distanceKNN.kneighbors(Rayquaza)[1][0]
Giratina_voisins = distanceKNN.kneighbors(Giratina)[1][0]
Dialga_voisins = distanceKNN.kneighbors(Dialga)[1][0]
Palkia_voisins = distanceKNN.kneighbors(Palkia)[1][0]

# DataFrame des résultats
result = pd.DataFrame({'Mewtwo': Mewtwo_voisins,
                       'Lugia': Lugia_voisins,
                       'Rayquaza': Rayquaza_voisins,
                       'Giratina': Giratina_voisins,
                       'Dialga': Dialga_voisins,
                       'Palkia': Palkia_voisins,
                       })
display(result)

Unnamed: 0,Mewtwo,Lugia,Rayquaza,Giratina,Dialga,Palkia
0,242,508,268,510,12,8
1,264,3,465,260,7,12
2,23,260,7,159,653,653


In [None]:
# On remplace le numéro par le nom du pokemon pour être plus clair
result['Mewtwo'] = result['Mewtwo'].apply(lambda x: df_pokemon['Name'][x])
result['Lugia'] = result['Lugia'].apply(lambda x: df_pokemon['Name'][x])
result['Rayquaza'] = result['Rayquaza'].apply(lambda x: df_pokemon['Name'][x])
result['Giratina'] = result['Giratina'].apply(lambda x: df_pokemon['Name'][x])
result['Dialga'] = result['Dialga'].apply(lambda x: df_pokemon['Name'][x])
result['Palkia'] = result['Palkia'].apply(lambda x: df_pokemon['Name'][x])

In [None]:
# On affiche les résultats : 3 pokemons les plus proches de chacun des 6 pokemons habituels
display(result)

Unnamed: 0,Mewtwo,Lugia,Rayquaza,Giratina,Dialga,Palkia
0,Remoraid,Lumineon,Tyranitar,Snover,Blastoise,Mega Charizard X
1,Entei,Venusaur,Pachirisu,Magby,Charizard,Blastoise
2,Pidgeot,Magby,Charizard,Moltres,Amoonguss,Amoonguss
