# Pokemon Convolution

This Jupyter Notebook will explore predicting information about a Pokemon based on their sprite. 

Steps:
1) WebScrape together a dataset of Pokemon images

2) Develop and Train a convolutional model to classify Pokemon by their type

3) Develop and Train a regression model to predict the stats of a Pokemon

4) Put it all together into a single function that plots the sprite and model predictions agaisnt the ground truth




In [None]:
import os
import json
import requests
from bs4 import BeautifulSoup
import urllib3
import shutils
import tqdm
import random
import PIL
import pickle
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import tensorflow as tf
import tensorflow.keras.layers as tfl
from tensorflow.keras.preprocessing import image_dataset_from_directory




## Scrape and Store the Data

Define a buildDatabase function to import all the pokemon name, typings, and a link to download their sprite and store it in a Pandas DataFrame. Thanks to Pokemondb.net!

In [None]:
def buildDatabase(url):

    # Pull in the url's html
    webpage = requests.get(url)
    
    # Use Beautiful Soup to parse through the html
    soup = BeautifulSoup(webpage.content, "html.parser")
    
    # Navigate the soup datastructure to sort through each pokemon to pull its information
    typing_list = []
    for entry in soup.find_all("div", class_="infocard"):
        
        # Identify the Pokemon's name and typing
        pokemon = entry.find_all("a", class_ = True)
        for i, item in enumerate(pokemon):
            
            # Record the name
            item = item["href"].split("/")
            if item[1] == 'pokedex':
                name = item[2]
            
            # Record the typing
            if item[1] =='type':
                typing = item[2]
                typing_list.append(typing)
                
        # Handle dual and single typing
        if len(typing_list) == 2:
            typing1 = typing_list[0]
            typing2 = typing_list[1]
            typing_list = []
        if len(typing_list) == 1:
            typing1 = typing_list[0]
            typing2 = "None"
            typing_list = []
        
        # End Pokemon collection after this point due to change in sprite quality
        if name =="meltan":
            break
        
        # Identify the Pokemon's sprites image link  
        link = entry.find("span", class_="img-fixed img-sprite")
        link = link["data-src"]
        
        # Store the Pokemon's information in the DataFrame
        entry_dict = {'Pokemon':name,
                     'Type 1' : typing1,
                     'Type 2' : typing2,
                     'ImageLink' : link}
      
        if name != 'bulbasaur':
            pokemonDataFrame = pokemonDataFrame.append(entry_dict, ignore_index=True)
        else:
            pokemonDataFrame = pd.DataFrame(data=entry_dict, index=[0])
    
    # With a list of all of the pokemon, we will have to use another website to gather all of their stats
    hp = []
    attack = []
    defense = []
    sp_attack = []
    sp_defense = []
    speed = []
    
    # Cycle through the seperate website for each pokemon
    for i in range(len(pokemonDataFrame)):
        
        # Define Pokemon specific website url
        url = 'https://pokemondb.net/pokedex/' +  pokemonDataFrame["Pokemon"][i]
        
        # Pull in the url's html
        webpage = requests.get(url)

        # Use Beautiful Soup to parse through the html
        soup = BeautifulSoup(webpage.content, "html.parser")
        
        # Sort through to find the append the base stat to the list
        for j, stat in enumerate(soup.find_all("td", class_="cell-num")):
            if j == 0:
                hp.append(stat.text)
            if j == 3:
                attack.append(stat.text)
            if j == 6:
                defense.append(stat.text)
            if j == 9:
                sp_attack.append(stat.text)
            if j == 12:
                sp_defense.append(stat.text)
            if j == 15:
                speed.append(stat.text)
    
    # Create a Data Frame with the stats and attach it to the existing Data Frame
    stats_dict = {'HP': hp,
            'Attack' : attack,
            'Defense' : defense,
            'Special Attack' : sp_attack,
            'Special Defense' : sp_defense,
            'Speed' : speed,}
    
    # Create the stats Data Frame
    statsDatatFrame = pd.DataFrame(data=stats_dict)
    
    # Attach it to the existing Data Frame
    pokemonDataFrame = pd.concat([pokemonDataFrame, statsDatatFrame], axis=1)        
    
    return pokemonDataFrame



Let's run the function to create our DataFrame and check out an entry.

In [None]:
pokemonDataFrame = buildDatabase('https://pokemondb.net/pokedex/national')
print(pokemonDataFrame.iloc[156])

pokemonDataFrame.to_pickle("./PokemonDataFrame.pkl")

## Develop Image Dataset

Now that we have a Data Frame with all of the pokemon we plan to train and test our model with, we need to go through and download the sprite image for each pokemon and save it to a local folder. I started by webscrapping all of the images from the most recent generation, which resulted in ~800 training examples for 20 different classes. The thinking was that higher resolution training examples would provide better training data; however, that decision proved poor. It was challenging to train the model with that few training examples, so I went back and downloaded more from older generations to have closer to 6k training examples.  To further augment the data, the convolutional model included a random horizontal flip and a random rotation. Images of the current training dataset can be seen below Images are saved based on their classification, which will allow TensorFlow to load the entire Dataset, both input and classification, in with one command.

In [None]:
def spriteDownloaderwithType(df):

    # Go down each entry of the dataframe and save the sprite's image from the "ImageLink" column
    for i in range(len(df)):
        
        # Pull the url and define the save path
        url = df["ImageLink"].iloc[i]
        filename = url.split("/")[-1]
        
        # Only handling primary typing           
        typing = df["Type 1"].iloc[i]
                
        # Determine Directory to save photo
        path = os.getcwd() + "\PokemonSprites\\" + typing + "\\"
        
        # Check if the correct path already exists
        if os.path.isdir(path):
            
            # Save photo
            response = requests.get(url)
            with open(path+filename, 'wb') as local_file:
                local_file.write(response.content)
                
         # Create the path if necessary
        else:
            new_directory = typing
            directory = os.getcwd() + "\PokemonSprites\\"
            directory = os.path.join(directory, new_directory)
            os.mkdir(directory)
            
            # Save photo
            response = requests.get(url)
            with open(path+filename, 'wb') as local_file:
                local_file.write(response.content)               


In [None]:
spriteDownloaderwithType(pokemonDataFrame)

At this point, we have collected all of the data to train and test our model. All of the images are stored in a local folder. Let's start by defining the image size and datasets(train/dev/test).

In [None]:
# Define tf.Dataset parameters
BATCH_SIZE = 32
IMG_SIZE = (128, 128)
IMG_SHAPE = (128, 128,3)

# Define file path
trainDir = os.getcwd() + "\\cleaned_train"
testDir = os.getcwd() + "\\cleaned_test"
seed = 107

# Load in the training set and apply augmentation techniques
trainDataGenerator = ImageDataGenerator(rescale = 1.0/255.0,
                                       rotation_range = 90,
                                       zoom_range = 0.2,
                                       shear_range = 0.2,
                                       horizontal_flip = True)

trainDataGenerator = trainDataGenerator.flow_from_directory(trainDir,
                                                           target_size = IMG_SIZE,
                                                           class_mode = "categorical",
                                                           batch_size = BATCH_SIZE)

# Load in the test set
testDataGenerator = ImageDataGenerator(rescale = 1.0/255.0)

testDataGenerator = testDataGenerator.flow_from_directory(testDir,
                                                           target_size = IMG_SIZE,
                                                           class_mode = "categorical",
                                                           batch_size = BATCH_SIZE)

classNames = trainDataGenerator.class_indices.keys()

## Model Optimization

The training data looks sufficient at this point. Now, we can look at developing the convolutional model. Given the size of the dataset and that Pokémon type recognition seemed akin to classifying animals at a zoo, transfer learned from readily available pre-trained networks seemed like an auspicious path. 

To speed up the model selection profess, I opted to reduce the image size down to 75x75 to weed our poorly performing models in favor of moe auspicious architectures. Then, once I was happy with the model, I updated image size to 128x128. Althought it drastically increased the training time, the accuracy increased by around 20% with the increased resolution. Since all of teh preliminary development was done on smaller files, I was able to consider more options quicker. Ultimately, I arrived at a validation set accuracy of ~76%. 

In [None]:
def buildModel():
    
    # Load in a pretrained InceptionNetV3
    ptModel = tf.keras.applications.inception_v3.InceptionV3(include_top=False, input_shape=IMG_SHAPE, weights="imagenet")
    
    # Freeze all the existing layers in the pretrained Inception Model
    for layer in ptModel.layers:
        layer.trainable = False
    
    # For our model, we will pull from an intermediate layer, "mixed7" and use that as an input into our layers
    LastLayer = ptModel.get_layer("mixed7")
    LastOutput = LastLayer.output
    
    # Reduce the output of the network with a (1,1) Convolution filter
    x = tf.keras.layers.Conv2D(1024, 1)(LastOutput)
    x = tf.keras.layers.Flatten()(x)
    
    # Add 2 FC layers with Dropout
    x = tf.keras.layers.Dropout(0.15)(x)
    x = tf.keras.layers.Dense(512, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.15)(x)
    x = tf.keras.layers.Dense(512, activation="relu")(x)
    
    # Add softmax output layer
    x = tf.keras.layers.Dense(18, activation ="softmax")(x)
    
    # Build the model
    model = tf.keras.Model(inputs=ptModel.inputs, outputs=x)
    
    return model

In [None]:
TypeModel = buildModel()
print(TypeModel.summary())

TypeModel.compile(optimizer=tf.keras.optimizers.Adam(learning_rate =0.000005),
            loss='categorical_crossentropy',
            metrics=['accuracy'])

## Model Training

In [None]:
# Setup callbacks to stop unauspicous combinatoins from completing all epochs and stop models once overfitting occurs
stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', 
                                            patience= 100, 
                                            restore_best_weights=True)

# Train the model
history = TypeModel.fit(trainDataGenerator,
                        validation_data = testDataGenerator,
                        epochs = 500, steps_per_epoch = 128,
                        validation_batch_size = 128, validation_steps = 16,
                        shuffle=True, callbacks = [stopping])

# Save the model weighs
directory = os.getcwd() + "\\Model\\TunedModel"
TypeModel.save_weights(directory)

In [None]:
directory = os.getcwd() + "\\Model\\TunedModel"
TypeModel.load_weights(directory)

Only 3 classes comprised 70% of the training data, which was one of the major issues. Another issue was the inconsistency in the training data. Not all images were of similar quality or same size. With more time, I would make a confusion matrix to better understand the error and work on image handling to improve the train and test dataset.  Developing a framework to understand the error was challenging, because in theory, the Bayes Error is 0%. There are people who know the typing of every Pokémon, but that's because there are a finite number of Pokémon and that can easily be memorized. On the other hand, the model is tasked with the task of classifying Pokémon it hasn't seen before, which explains the some of the avoidable bias. Although impossible to quantify, the error for experts completing the same task as the model would likely be greater than 0%. For such an arbitrary and meaningless task, it is challenging to put numbers to the type of error present in the model.

In [None]:
def history_plotter(history):
    acc = [0.] + history.history['accuracy']
    val_acc = [0.] + history.history['val_accuracy']

    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(acc, label='Training Accuracy')
    plt.plot(val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.ylabel('Accuracy')
    plt.ylim([min(plt.ylim()),1])
    plt.title('Training and Validation Accuracy')

    loss = [0.] + history.history['loss']
    val_loss = [0.] + history.history['val_loss']

    plt.figure(figsize=(8, 8))
    plt.subplot(2, 1, 1)
    plt.plot(loss, label='Training Loss')
    plt.plot(val_loss, label='Validation Loss')
    plt.legend(loc='lower right')
    plt.ylabel('Loss')
    plt.ylim([min(plt.ylim()),max(plt.ylim())])
    plt.title('Training and Validation Loss')

history_plotter(history)

In [None]:
TypeModel.evaluate(testDataGenerator)

## Regression Model Development

The next step is to develop a model that translates images into numerical values, representing the stats of a Pokémon. Starting from scratch by making another convolutional model would require more time doing model development. Instead, the classification model can be turned into an image encoder by removing the SoftMax layer. Then, the processed image encodings would be used to tune a simple sequential model.  The intuition is that layers early in a convolutional network pickup small and individual edges, and as the information propagates, the network develops these into larger scale or more meaningful pieces of information. So, the last dense layer in the classification model has taken the (128, 128, 3) image and distilled it into 512 factors that suffice to describe the original image.

In [None]:
# Load the trained model and remove the softmax layer to make it into an encoding network
def EncodingModel(TypeModel):

    # Get the pre-trained model and its weights
    directory = os.getcwd() + "\\Model\\TunedModel"
    TypeModel.load_weights(directory)
    
    # Update the model to remove the softmax layer.
    x = TypeModel.layers[-2].output
    
    # Define output & model
    TypeModel = tf.keras.Model(inputs=TypeModel.input, outputs=x)
    
    return TypeModel

EncModel = EncodingModel(TypeModel)
EncModel.summary()

In [None]:
def spriteEncoder(pokemonDataFrame,EncModel):

    
    # Cycle through each pokemon, pull its sprite, feed it to the encoding model, and story the output
    for i in range(len(pokemonDataFrame)):
        
        # Define directory
        pokemon = pokemonDataFrame["Pokemon"][i] + ".png"
        directory = os.getcwd() + "\\PokemonSpritesalphabetical\\"
        filepath = directory + pokemon
        
        # Load and format the image
        image = tf.keras.preprocessing.image.load_img(filepath, grayscale=False, color_mode='rgb',interpolation='nearest')
        input_arr = tf.keras.preprocessing.image.img_to_array(image)
        input_arr = np.expand_dims(input_arr, axis=0)
        
        # Process the image with a single forard propagation
        encoding = EncModel.predict(x=input_arr)
        
        # Append the encoding to the X_data array
        if i == 0:
            X_data = np.array(encoding)
        else:
            X_data = np.vstack((X_data, encoding))
    
    # Return the X_Data and the Y_Data
    Y_data = pokemonDataFrame[["HP", "Attack", "Defense", "Special Attack", "Special Defense", "Speed"]]

    
    return X_data, Y_data


Now that we have function to handle the encoding, we can develop our input and output data for the stat predictions.

In [None]:
# Create a set of input and output data
X_data, Y_data = spriteEncoder(pokemonDataFrame,EncModel)

# Check dimensions to ensure everything ran correctly
print(X_data.shape)
print(Y_data.shape)

# Develop data in a TF Dataset -- disregarding good practice of train/dev/test splits
Y_data = tf.strings.to_number(Y_data)
statDataset = tf.data.Dataset.from_tensor_slices((X_data, Y_data)).batch(32)

For the regression model that handles stat predictions, a simple model with 3 dense layers and dropout will suffice.


In [None]:
def buildStatModel():

    # Define input
    encoding_shape = (512)
    input_encoding = tf.keras.Input(shape=encoding_shape)
    
    # Add 3 Dense layers with
    x = tf.keras.layers.Dense(100, activation="relu")(input_encoding)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(100, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(100, activation="relu")(x)
    x = tf.keras.layers.Dropout(0.5)(x) 
    
    # Define Output Layer
    predictions = tf.keras.layers.Dense(6)(x)
    
    # NN tend to prefer smaller numbers, so applying a scaling factor here should help the model
    predictions = tf.math.multiply(predictions, 10)
    
    # Return Model
    statModel = tf.keras.Model(inputs=input_encoding, outputs=predictions)
    
    return statModel

In [None]:
# Build Model
statModel = buildStatModel()

statModel.compile(optimizer=tf.keras.optimizers.Adam(learning_rate =0.0001),
                 loss="mse",
                 metrics=["mae"])

statModel.summary()

In [None]:
# Train the model
history = statModel.fit(statDataset, epochs=100)

Putting this all together,  we have a model that takes an image of a Pokémon and predicts its typing and stats. One model handles the image encoding and classification, and the second model leverages the image encoding of the first model to predict its stats.

In [None]:
def predictionChecker(number,pokemonDataFrame, statModel, TypeModel, X_data, Y_data, classNames):

    
    #Retrieve and show Sprite Image
    pokemonDataFrame.iloc[number]
    pokemon = pokemonDataFrame["Pokemon"][number] + ".png"
    directory = os.getcwd() + "\\PokemonSpritesalphabetical\\"
    filepath = directory + pokemon
    image = PIL.Image.open(filepath)
    
    # Load and format the image
    image = tf.keras.preprocessing.image.load_img(filepath, grayscale=False, color_mode='rgb',interpolation='nearest')
    input_arr = tf.keras.preprocessing.image.img_to_array(image)
    input_arr = np.expand_dims(input_arr, axis=0)
    
    # Predict Type
    typePrediction = TypeModel.predict(input_arr, verbose=0)
    print(typePrediction)
    
    # Predict Stats
    input_arr = np.expand_dims(X_data[number,:], axis=0)
    statPrediction = statModel.predict(input_arr)


    #Plot Sprite
    plt.figure(figsize=(7, 7))
    plt.imshow(image)
    plt.axis('off')
    plt.title( pokemonDataFrame["Pokemon"][number] +"'s Sprite")
    
    #Setup Plot
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,6))
    fig.suptitle("Model Predictions")
    
    # Plot Type Prediction
    typePred = pd.DataFrame(data=typePrediction.reshape(-1), index=classNames, columns=["Percentage of Confidence"])
    typePred = typePred.sort_values(by="Percentage of Confidence", ascending=True)
    typePred = typePred[14:]
    typePred.plot.barh(ax=ax2)
    plt.title("Correct Type: " + pokemonDataFrame["Type 1"][number])

    
    # Plot Stat Prediction
    indices = ["HP", "Attack", "Defense", "Special Attack", "Special Defense", "Speed"]
    actualStats = np.array(Y_data[number,:])
    stats = np.hstack((actualStats.reshape(6,1), statPrediction.reshape(6,1)))
    stats = pd.DataFrame(data=stats, index = indices, columns=["Actual Stats", "Predicted Stats"])
    stats.plot.barh(ax=ax1)

    plt.savefig("figure")


predictionChecker is the bow on top of this project. It will take the Pokedex number of a Pokemon show the model input and outpus.

In [None]:
# Check the prediction of a pokemon
PokedexNumber = 4
predictionChecker(PokedexNumber, pokemonDataFrame, statModel, TypeModel, X_data, Y_data, classNames)

## Test Myself

Now that the formalities are out of the way, I can use this program for its intended use. 

In [None]:
#Retrieve and Image of me
filepath = os.getcwd() + "\\Headshot.jpg"
image = PIL.Image.open(filepath)
    
# Load and format the image
image = tf.keras.preprocessing.image.load_img(filepath, grayscale=False, color_mode='rgb', target_size=(128,128,3),interpolation='nearest')
input_arr = tf.keras.preprocessing.image.img_to_array(image)
input_arr = np.expand_dims(input_arr, axis=0)
    
# Predict Type
typePrediction = TypeModel.predict(input_arr, verbose=0)
    
# Predict Stats
encoding = EncModel.predict(x=input_arr)
input_arr = np.expand_dims(encoding, axis=0)
statPrediction = statModel.predict(encoding)


#Plot Sprite
plt.figure(figsize=(7, 7))
plt.imshow(image)
plt.axis('off')
plt.title( "Patrick's Sprite")
    
#Setup Plot
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16,6))
fig.suptitle("Model Predictions")

# Plot Type Prediction
typePred = pd.DataFrame(data=typePrediction.reshape(-1), index=classNames, columns=["Percentage of Confidence"])
typePred = typePred.sort_values(by="Percentage of Confidence", ascending=True)
typePred = typePred[14:]
typePred.plot.barh(ax=ax2)

# Plot Stat Prediction
indices = ["HP", "Attack", "Defense", "Special Attack", "Special Defense", "Speed"]
stats = statPrediction.reshape(6,1)
stats = pd.DataFrame(data=stats, index = indices, columns=[ "Predicted Stats"])
stats.plot.barh(ax=ax1)
ax1.legend([])


I think the psychich typing is a compliment, hahaha!

## Data Visualization



In [None]:
import sklearn
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import matplotlib.animation as animation

pokemonDataFrame = pd.read_pickle("./PokemonDataFrame.pkl")

In [None]:
# Plot the distibution of classes across the entire dataset (train and test)
typeDistribution = pokemonDataFrame["Type 1"].value_counts()
fig = plt.figure(figsize=(20,5))
plt.bar(height = typeDistribution[:], x = typeDistribution.index)
plt.title("Data Set Type Distribution")

plt.savefig("typeDistribution")
plt.show()

In [None]:
# Select the classic 3: Water, Grass, and Fire
indicesW = pokemonDataFrame['Type 1'].str.contains('water')
indicesG = pokemonDataFrame['Type 1'].str.contains('grass')
indicesF = pokemonDataFrame['Type 1'].str.contains('fire')

# Remove all other types to understand how accurate the type encodings are
pokemonDataFrameSimple = pokemonDataFrame[indicesW + indicesG + indicesF]
encodingSimple = X_data[indicesW + indicesG + indicesF,:]

# Check sizes to ensure data was pruned correcly
print(pokemonDataFrameSimple.shape)
print(encodingSimple.shape)

In [None]:
# Perform a PCA on the Data to understand how variance is distributed by encoding components
pca = PCA() 
pca.fit(encodingSimple)

# Plot the retained variance against number of components
fig = plt.figure(figsize = (5,5))
plt.plot( np.cumsum(pca.explained_variance_ratio_))
plt.title("CDF of Variance")
plt.ylabel("Percentage of Variance Explained")
plt.xlabel("Number of Variables")
plt.savefig("PCA")
plt.show()


In [None]:
# Peform a PCA with 3 components to visualize the distinction between Fire, Water, and Grass encodings
scaling = StandardScaler()
scaling.fit(encodingSimple)
encodingSimple = scaling.transform(encodingSimple)
pca = PCA(n_components=3)
pca.fit(encodingSimple)
pcaEncodings = pca.transform(encodingSimple)



# Make a 3D scatter plot
maskW = pokemonDataFrameSimple["Type 1"].str.contains('water')
maskF = pokemonDataFrameSimple["Type 1"].str.contains("grass")
maskG = pokemonDataFrameSimple["Type 1"].str.contains("fire")

fig = plt.figure(figsize = (10,10))
ax = fig.add_subplot(projection='3d')

wScat = ax.scatter(pcaEncodings[maskW][:,0],
               pcaEncodings[maskW][:,1],
               pcaEncodings[maskW][:,2],
               'b')

fScat = ax.scatter(pcaEncodings[maskF][:,0],
               pcaEncodings[maskF][:,1],
               pcaEncodings[maskF][:,2],
               'r')

gScat = ax.scatter(pcaEncodings[maskG][:,0],
               pcaEncodings[maskG][:,1],
               pcaEncodings[maskG][:,2],
               'g')

ax.legend([fScat, wScat, gScat], ['Fire', 'Water', 'Grass'])

# Create a Gif of the 
ax.view_init(elev=45, azim = 0)
def rotateZ(angle):
    ax.view_init(azim=angle, elev=angle/5)
    
rot_animation = animation.FuncAnimation(fig, rotateZ, frames=np.arange(0, 270, 2), interval=100)
rot_animation.save('rotation.gif', dpi=80, writer='imagemagick')

In [None]:
ax.view_init(elev=10, azim = 10)