Mostly familiarizing myself with Kaggle-hosted nb and playing around with pokemon stat distributions.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA

sns.set_style('whitegrid')

% matplotlib inline


In [None]:
D = pd.read_csv('../input/Pokemon.csv')
D['Mega'] = D['Name'].map(lambda x: 'Mega' in x)
nt2 = pd.isnull(D['Type 2'])
D.loc[nt2, 'Type 2'] = D.loc[nt2, 'Type 1']
D.head()

In [None]:
stat_fields = ['HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']
zscore_fields = [f + ' Z' for f in stat_fields]
component_fields = ['C' + str(i) for i in range(len(stat_fields))]

stats = D[stat_fields].values
zscores = (stats - stats.mean(axis=0)) / stats.std(axis=0)

D = pd.concat(
    [D, pd.DataFrame(columns=zscore_fields), pd.DataFrame(columns=component_fields)]
)
D[zscore_fields] = zscores

pca = PCA()
pca.fit(zscores)
transformed = pca.transform(zscores)

D[component_fields] = transformed
print(pca.explained_variance_ratio_)

In [None]:
U, S, V = np.linalg.svd(stats, full_matrices=False)
U

In [None]:
pca.components_

In [None]:
HP = D.pivot_table(index='Type 1', columns='Type 2', values='HP Z')
HP = HP.fillna(value=HP.T)

In [None]:
mask = np.zeros_like(HP.values)
mask[np.triu_indices_from(mask)] = True
mask[np.arange(mask.shape[0]), np.arange(mask.shape[0])] = False
sns.heatmap(HP, mask=mask, square=True)

sns.despine()
plt.tight_layout()

In [None]:
value_vars = ['Type 1', 'Type 2']
id_vars = [c for c in D.columns if c not in value_vars]

Dm = pd.melt(D, id_vars=id_vars, value_vars=value_vars, value_name='Type')

In [None]:
probe = ['Steel', 'Psychic', 'Normal', 'Fighting', 'Electric']
df = Dm.loc[Dm['Type'].isin(probe)]
sns.pairplot(
    df[['Type'] + zscore_fields], 
    hue='Type', diag_kind='kde', 
    plot_kws={'alpha': .8}
)

sns.despine()