# Pokemon Analysis (Similarity Matrix + Network Graphing)
Keon Feizy-Marandy & Lexin Deang

In [15]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics.pairwise import cosine_similarity

In [16]:
df = pd.read_csv('pokedex.csv')
df.head()[:5]

Unnamed: 0,id,name,height,weight,hp,attack,defense,s_attack,s_defense,speed,type,evo_set,info
0,1,bulbasaur,7,69,45,49,49,65,65,45,"{grass,poison}",1,A strange seed was planted on its back at birt...
1,2,ivysaur,10,130,60,62,63,80,80,60,"{grass,poison}",1,"When the bulb on its back grows large, it appe..."
2,3,venusaur,20,1000,80,82,83,100,100,80,"{grass,poison}",1,The plant blooms when it is absorbing solar en...
3,4,charmander,6,85,39,52,43,60,50,65,{fire},2,"Obviously prefers hot places. When it rains, s..."
4,5,charmeleon,11,190,58,64,58,80,65,80,{fire},2,"When it swings its burning tail, it elevates t..."


In [None]:
df.describe()

Unnamed: 0,id,height,weight,hp,attack,defense,s_attack,s_defense,speed,evo_set
count,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0
mean,513.0,12.116098,669.865366,70.18439,77.521951,72.507317,70.080976,70.205854,67.186341,253.195122
std,296.036315,12.481673,1212.731138,26.631054,29.782541,29.286972,29.658378,26.639329,28.717227,159.505305
min,1.0,1.0,1.0,1.0,5.0,5.0,10.0,20.0,5.0,1.0
25%,257.0,5.0,85.0,50.0,55.0,50.0,47.0,50.0,45.0,110.0
50%,513.0,10.0,280.0,68.0,75.0,70.0,65.0,67.0,65.0,257.0
75%,769.0,15.0,700.0,85.0,100.0,90.0,90.0,86.0,88.0,387.0
max,1025.0,200.0,9999.0,255.0,181.0,230.0,173.0,230.0,200.0,549.0


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 13 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   id         1025 non-null   int64 
 1   name       1025 non-null   object
 2   height     1025 non-null   int64 
 3   weight     1025 non-null   int64 
 4   hp         1025 non-null   int64 
 5   attack     1025 non-null   int64 
 6   defense    1025 non-null   int64 
 7   s_attack   1025 non-null   int64 
 8   s_defense  1025 non-null   int64 
 9   speed      1025 non-null   int64 
 10  type       1025 non-null   object
 11  evo_set    1025 non-null   int64 
 12  info       1025 non-null   object
dtypes: int64(10), object(3)
memory usage: 104.2+ KB


Selecting Features, normalizing, and scaling

In [21]:
features = ['hp', 'attack', 'defense', 's_attack', 's_defense', 'speed', 'height', 'weight']

# Convert the features to numeric, coerce errors to NaN
df[features] = df[features].apply(pd.to_numeric, errors='coerce')

# Normalize the features
scaler = MinMaxScaler() # Just to normalize the data regardless of how normalized it looks to make sure none of the values dominate
X = scaler.fit_transform(df[features])

### Cosine Similarity 
Cosine similarity is used in this case because it measures angle rather than distance. This is useful for our analysis because we are looking for similarity in the features of the Pokemon, not the distance between them

In [24]:
similarity = cosine_similarity(X)
similarity_df = pd.DataFrame(similarity, index=df['name'], columns=df['name'])

similarity_df.head()[:5]

name,bulbasaur,ivysaur,venusaur,charmander,charmeleon,charizard,squirtle,wartortle,blastoise,caterpie,...,fezandipiti,ogerpon,archaludon,hydrapple,gouging-fire,raging-bolt,iron-boulder,iron-crown,terapagos,pecharunt
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
bulbasaur,1.0,0.999604,0.994229,0.972736,0.984294,0.98767,0.979483,0.98496,0.984973,0.793602,...,0.960517,0.928723,0.972157,0.967361,0.835231,0.901861,0.929352,0.984979,0.970857,0.946709
ivysaur,0.999604,1.0,0.995791,0.974976,0.986288,0.989238,0.98166,0.98743,0.987954,0.803514,...,0.966738,0.93483,0.970644,0.964172,0.842431,0.903184,0.936515,0.986688,0.974058,0.949058
venusaur,0.994229,0.995791,1.0,0.972153,0.984108,0.991686,0.979441,0.987025,0.992425,0.808329,...,0.968978,0.939849,0.969792,0.963824,0.884084,0.930729,0.949614,0.991599,0.970574,0.945539
charmander,0.972736,0.974976,0.972153,1.0,0.997384,0.99116,0.951511,0.959494,0.959629,0.866727,...,0.951465,0.953542,0.96193,0.911847,0.833491,0.868682,0.954577,0.964341,0.939758,0.929783
charmeleon,0.984294,0.986288,0.984108,0.997384,1.0,0.997054,0.96154,0.969704,0.970736,0.856104,...,0.957024,0.946487,0.969281,0.933601,0.841191,0.892729,0.950288,0.978699,0.952633,0.936918


In [31]:
# Top 5 pokemon with most similar features to Pikachu
similarity_df['pikachu'].sort_values(ascending=False)[1:6]


name
swellow       0.994558
persian       0.994126
elekid        0.993550
poliwag       0.992038
talonflame    0.991366
Name: pikachu, dtype: float64

In [32]:
# top 5 pokemon with least similar features to Pikachu
similarity_df['pikachu'].sort_values(ascending=True)[:5]

name
shuckle      0.356563
cosmoem      0.405249
chansey      0.476473
happiny      0.502610
stakataka    0.542217
Name: pikachu, dtype: float64