In [1]:
import pandas as pd
from sklearn.manifold import TSNE, MDS
import altair as alt

In [2]:
df_pokemon = pd.read_csv('pokemon.csv')

# Fix data

## Non-numeric capture_rate

In [3]:
df_pokemon.loc[df_pokemon['name'] == 'Minior', 'capture_rate']

773    30 (Meteorite)255 (Core)
Name: capture_rate, dtype: object

In [4]:
df_pokemon.loc[df_pokemon['name'] == 'Minior', 'capture_rate'] = 30

## Typo

In [5]:
df_pokemon = df_pokemon.rename(columns={'classfication': 'classification'})

# Use base stats for t-sne

In [6]:
X = df_pokemon[['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp']].values

In [7]:
X = (X - X.mean())/ X.std()

In [8]:
# X_embedded = MDS(n_components=2).fit_transform(X)
X_embedded = TSNE(n_components=2, random_state=801).fit_transform(X)

In [9]:
X_embedded

array([[  1.7333766, -23.538902 ],
       [  7.7839303,  -8.066467 ],
       [ 15.713701 ,  29.887104 ],
       ...,
       [ 33.595757 ,  10.708869 ],
       [ 12.350531 ,  27.207962 ],
       [ 16.80037  ,  29.456789 ]], dtype=float32)

In [10]:
df_pokemon['x'] = X_embedded[:, 0]
df_pokemon['y'] = X_embedded[:, 1]

# Plot t-sne result against base stats

In [None]:
chart = alt.Chart(df_pokemon, title='attack').mark_point().encode(
    x='x',
    y='y',
    color='attack',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
) | alt.Chart(df_pokemon, title='defense').mark_point().encode(
    x='x',
    y='y',
    color='defense',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
)
chart &= alt.Chart(df_pokemon, title='sp_attack').mark_point().encode(
    x='x',
    y='y',
    color='sp_attack',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
) | alt.Chart(df_pokemon, title='sp_defense').mark_point().encode(
    x='x',
    y='y',
    color='sp_defense',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
)
chart &= alt.Chart(df_pokemon, title='hp').mark_point().encode(
    x='x',
    y='y',
    color='hp',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
) | alt.Chart(df_pokemon, title='speed').mark_point().encode(
    x='x',
    y='y',
    color='speed',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
)
chart

## Legendary and bast total

In [None]:
chart = alt.Chart(df_pokemon, title='is_legendary').mark_point().encode(
    x='x',
    y='y',
    color='is_legendary:N',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
) | alt.Chart(df_pokemon, title='base_total').mark_circle().encode(
    x='x',
    y='y',
    color='base_total',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
)
chart

## Capture rate

In [None]:
chart = alt.Chart(df_pokemon, title='capture_rate').mark_point().encode(
    x='x',
    y='y',
    color='capture_rate',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
)
chart

## Experience growth

In [None]:
alt.Chart(df_pokemon, title='experience_growth').mark_circle().encode(
    x='x',
    y='y',
    color='experience_growth',
    tooltip=['attack', 'defense', 'sp_attack', 'sp_defense', 'speed', 'hp', 'is_legendary', 'base_total']
)

In [None]:
df_pokemon = df_pokemon.rename(columns={'classfication': 'classification'})
attrs = [
    'pokedex_number',
    'name',
    'japanese_name',
    'base_total',
    'attack',
    'defense',
    'sp_attack',
    'sp_defense',
    'speed',
    'hp',
    'capture_rate',
    'experience_growth',
    'type1',
    'type2',
    'classification',
    'generation',
    'is_legendary',
    'x',
    'y'
]
df_pokemon[attrs].to_csv('pokemon_tsne.csv', index=False)