In [None]:
import numpy as np 
import pandas as pd
import os
from matplotlib import pyplot as plt
from matplotlib import image
from sklearn.model_selection import train_test_split
from sklearn import tree, metrics
import graphviz 
from sklearn.cluster import KMeans
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
import random
from sklearn.decomposition import PCA
from matplotlib.colors import ListedColormap

# Pokemon stats

In [None]:
pokemon_stats = pd.read_csv('/kaggle/input/pokemon/pokemon.csv')
pokemon_stats.shape

In [None]:
pokemon_stats.isna().sum()

## Pokemon numerical stats

In [None]:
pokemon_stats_small = pokemon_stats[['name','type1','type2','classfication','base_total','attack','defense','hp','sp_attack','sp_defense','speed','height_m','weight_kg']]
pokemon_stats_small.head()

In [None]:
pokemon_stats_small.hist(['base_total','attack','defense','hp','sp_attack','sp_defense','speed','height_m','weight_kg'],figsize=(12,8))

In [None]:
pokemon_stats_small['type1'].value_counts().plot.bar()
plt.title('primary type')

In [None]:
pokemon_stats_small['type2'].value_counts().plot.bar()
plt.title('secondary type')

In [None]:
pokemon_stats_small['classfication'].value_counts().head(25) 
#we could try to cluster some of these? but there's not that many

## Strength against other pokemon

In [None]:
against_cols = ['name','type1']
against_cols.extend([col for col in pokemon_stats.columns if 'against' in col])
pokemon_against = pokemon_stats[against_cols]
pokemon_against.set_index('name')

In [None]:
pokemon_against.hist(against_cols,figsize=(16,12))

# Join with image dataset

In [None]:
image_directory = "../input/pokemon-images-and-types/images/images/"
imgs = os.listdir(image_directory)
pokemon_filenames = pd.DataFrame([[x.split('.')[0].split('-')[0] for x in imgs],imgs]).T
pokemon_filenames.columns = ['name','filepath']
pokemon_filenames.set_index('name')

## make names consistent between datasets

In [None]:
pokemon_filenames['name'].replace({'farfetchd':"farfetch'd",
                            'flabebe':'flabébé',
                            'hakamo':'hakamo-o',
                            'ho':'ho-oh',
                            'jangmo':'jangmo-o',
                            'kommo':'kommo-o',
                            'mr':'mr. mime',
                            'mime':'mime jr.',
                            'type':'type: null'},inplace=True)

In [None]:
pokemon_filenames.loc[pokemon_filenames['filepath']=='nidoran-f.png','name'] = 'nidoran (f)'
pokemon_filenames.loc[pokemon_filenames['filepath']=='nidoran-m.png','name'] = 'nidoran (m)'
pokemon_filenames.loc[pokemon_filenames['filepath']=='tapu-fini.jpg','name'] = 'tapu fini'
pokemon_filenames.loc[pokemon_filenames['filepath']=='tapu-koko.jpg','name'] = 'tapu koko'
pokemon_filenames.loc[pokemon_filenames['filepath']=='tapu-bulu.jpg','name'] = 'tapu bulu'
pokemon_filenames.loc[pokemon_filenames['filepath']=='tapu-lele.jpg','name'] = 'tapu lele'
pokemon_filenames.loc[pokemon_filenames['filepath']=='porygon-z.png','name'] = 'porygon-z'

In [None]:
pokemon_stats_small['name'] = pokemon_stats_small['name'].str.lower()
pokemon_stats_small['name'].replace({'nidoran♀':'nidoran (f)',
                                    'nidoran♂':'nidoran (m)'},inplace=True)

In [None]:
pokemon_against['name'] = pokemon_against['name'].str.lower()
pokemon_against['name'].replace({'nidoran♀':'nidoran (f)',
                                    'nidoran♂':'nidoran (m)'},inplace=True)

## join datasets

In [None]:
pokemon_joined = pokemon_stats_small.merge(pokemon_filenames,how='outer',indicator=True)
pokemon_joined

In [None]:
print(sorted(list(pokemon_joined[pokemon_joined['_merge']=='left_only']['name'])))

In [None]:
print(sorted(list(pokemon_joined[pokemon_joined['_merge']=='right_only']['name']))) #8 with only pics

In [None]:
pokemon_data = pokemon_joined[pokemon_joined['_merge']=='both'].drop('_merge',axis=1)
pokemon_data.set_index('name')

In [None]:
pokemon_against_joined = pokemon_against.merge(pokemon_filenames,how='inner')
pokemon_against_joined

# make an ml model?

In [None]:
y = pokemon_against.pop('type1')
X = pokemon_against.drop('name',axis=1)

In [None]:
random_state=0

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=random_state)
X_train.head()

In [None]:
X_train2,X_test2,y_train2,y_test2 = train_test_split(pokemon_against,y,test_size=0.2,random_state=random_state)
X_train2.head()

## decision tree

In [None]:
%%time
clf = tree.DecisionTreeClassifier(max_depth=100,random_state=random_state)
clf = clf.fit(X_train, y_train)

y_pred_dt = clf.predict(X_test) 

In [None]:
features = list(X_train.columns)
classes = clf.classes_
classes

In [None]:
dt_accuracy = np.sum(y_pred_dt == y_test)/len(y_test)
dt_accuracy

In [None]:
dot_data = tree.export_graphviz(clf, out_file=None,feature_names = features,class_names=classes) 
graph = graphviz.Source(dot_data)
graph

## clustering

In [None]:
%%time
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)

y_pred_knn = knn.predict(X_test)

In [None]:
cluster_accuracy = np.sum(y_pred_knn == y_test)/len(y_test)
cluster_accuracy

### with pca

In [None]:
%%time
pca = PCA(n_components=2)
reduced_data = pca.fit_transform(X_train)
knn_pca = KNeighborsClassifier()
knn_pca.fit(reduced_data,y_train)

reduced_data_test = pca.transform(X_test)
y_pred_knn_pca = knn_pca.predict(reduced_data_test)

In [None]:
cluster_accuracy = np.sum(y_pred_knn_pca == y_test)/len(y_test)
cluster_accuracy

## neural net

In [None]:
%%time
mlp = MLPClassifier(hidden_layer_sizes=(7,7),random_state=random_state)
mlp.fit(X_train, y_train)

y_pred_nn = mlp.predict(X_test)

In [None]:
nn_accuracy = np.sum(y_pred_nn == y_test)/len(y_test)
nn_accuracy

# Visualize correctness

In [None]:
types = list(y_train.unique())
type_color = ["#66EBFF","#8ED752", "#F95643","#FB61B4","#CDBD72","#BBBDAF",
             "#F0CA42","#8B76FF","#F9AEFE","#53AFFE","#C3D221","#A35449",
              "#8E6856","#7673DA","#AD5CA2","#F8E64E","#C3C1D7","#75A4F9"]
color_dict = dict(zip(types,type_color))
color_dict

In [None]:
def display_by_type(pokemon_type,num):
    filepaths = []
    filepaths.append(get_filepaths(y_test,pokemon_type,num)) #truth
    filepaths.append(get_filepaths(pd.Series(y_pred_dt,index=y_test.index),pokemon_type,num)) #dt
    filepaths.append(get_filepaths(pd.Series(y_pred_knn,index=y_test.index),pokemon_type,num)) #dt
    filepaths.append(get_filepaths(pd.Series(y_pred_nn,index=y_test.index),pokemon_type,num)) #nn
    
    cols = num
    rows = 4
    
    fig=plt.figure(figsize=(20,12))
    plt.text(0.04, 0.8, 'ground truth', fontsize=14, transform=plt.gcf().transFigure)
    plt.text(0.04, 0.6, 'decision tree', fontsize=14, transform=plt.gcf().transFigure)
    plt.text(0.04, 0.4, 'knn', fontsize=14, transform=plt.gcf().transFigure)
    plt.text(0.04, 0.2, 'neural network', fontsize=14, transform=plt.gcf().transFigure)
    plt.box(False)
    plt.xticks([])
    plt.yticks([])
    
    for i in range(rows):
        for j in range(cols):
            ax = fig.add_subplot(rows,cols,i*cols+j+1)
            if j >= len(filepaths[i]):
                plt.box(False)
                plt.xticks([])
                plt.yticks([])
                continue
            filepath,name,correct = filepaths[i][j]
            img = image.imread(image_directory+filepath)
            plt.imshow(img)
            plt.title(name + ': ' + correct,color= color_dict[correct])
            plt.xticks([])
            plt.yticks([])
            
            if i == 0:
                spine_color = 'gray'
            elif correct == pokemon_type:
                spine_color = 'green'
            else:
                spine_color = 'red'
            for spine in ax.spines.values():
                spine.set_edgecolor(spine_color)
    plt.show()
    
def get_filepaths(dataset,pokemon_type,num):
    idxs = list(dataset[dataset==pokemon_type].index)
    if len(idxs) > num:
        idxs = random.sample(idxs,num)  
    correct = y_test[idxs]
    pokemon = list(pokemon_against_joined['filepath'][idxs])
    names = list(pokemon_against_joined['name'][idxs])
    return list(zip(pokemon,names,correct))

In [None]:
%%time
display_by_type('fire',10)

In [None]:
def display(num):
    indices = random.sample(list(y_test.index),num)
    
    filepaths = []
    filepaths.append(get_filepaths_indices(y_test,indices)) #truth
    filepaths.append(get_filepaths_indices(pd.Series(y_pred_dt,index=y_test.index),indices)) #dt
    filepaths.append(get_filepaths_indices(pd.Series(y_pred_knn,index=y_test.index),indices)) #knn
    filepaths.append(get_filepaths_indices(pd.Series(y_pred_nn,index=y_test.index),indices)) #nn
    
    cols = num
    rows = 4
    
    fig=plt.figure(figsize=(25,10))
    plt.text(0.05, 0.8, 'ground truth', fontsize=14, transform=plt.gcf().transFigure)
    plt.text(0.05, 0.6, 'decision tree', fontsize=14, transform=plt.gcf().transFigure)
    plt.text(0.05, 0.4, 'knn', fontsize=14, transform=plt.gcf().transFigure)
    plt.text(0.05, 0.2, 'neural network', fontsize=14, transform=plt.gcf().transFigure)
    plt.box(False)
    plt.xticks([])
    plt.yticks([])
    
    for i in range(rows):
        for j in range(cols):
            ax = fig.add_subplot(rows,cols,i*cols+j+1)
            filepath,p_type = filepaths[i][j]
            img = image.imread(image_directory+filepath)
            plt.imshow(img)
            plt.title(p_type,color=color_dict[p_type])
            plt.xticks([])
            plt.yticks([])
            
            if i == 0:
                spine_color = 'gray'
            elif p_type == filepaths[0][j][1]: #correct
                spine_color = 'green'
            else:
                spine_color = 'red'
            for spine in ax.spines.values():
                spine.set_edgecolor(spine_color)
    plt.show()
    
def get_filepaths_indices(dataset,indices):
    types = dataset[indices]   
    pokemon = list(pokemon_against_joined['filepath'][indices])
    return list(zip(pokemon,types))

In [None]:
%%time
display(10)

In [None]:
%%time
# Create color maps
h=0.1
cmap = ListedColormap(type_color)
X_reduced = PCA(n_components=2).fit_transform(X)

# calculate min, max and limits
x_min, x_max = X_reduced[:, 0].min() - 1, X_reduced[:, 0].max() + 1
y_min, y_max = X_reduced[:, 1].min() - 1, X_reduced[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),np.arange(y_min, y_max, h))

# predict class using data and kNN classifier
Z = knn_pca.predict(np.c_[xx.ravel(), yy.ravel()])

# Put the result into a color plot
Z = Z.reshape(xx.shape)
Z_numeric = [[np.where(classes== i)[0][0] for i in row] for row in Z]#[[color_dict[i] for i in row] for row in Z]

plt.figure()
plt.pcolormesh(xx, yy, Z_numeric, cmap=cmap)

# # Plot also the training points
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c=[color_dict[i] for i in y], cmap=cmap, alpha=1.0, edgecolor="black")
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
plt.show()