In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Get dataset

In [None]:
# Get csv for type labels
pokemon_stats = pd.read_csv('/kaggle/input/pokemon-images-and-types/pokemon.csv')
pokemon_stats.set_index("Name")

# Get images for prediction
image_dir = "../input/pokemon-images-and-types/images/images/"
imgs = os.listdir(image_dir)
pokemon_filenames = pd.DataFrame([[x.split(".")[0] for x in imgs],imgs]).T
pokemon_filenames.columns = ['Name','Filepath']
pokemon_filenames.set_index('Name')

In [None]:
pokemon_stats.head(10)

In [None]:
TYPES = {
    0:"Water",
    1:"Normal",
    2:"Grass", 
    3:"Bug" ,
    4:"Psychic",
    5:"Fire" ,
    6:"Rock",
    7:"Electric",
    8:"Poison",
    9:"Ground", 
    10:"Fighting",
    11:"Dark",
    12:"Dragon",
    13:"Ghost",
    14:"Steel", 
    15:"Ice",
    16:"Fairy",
    17:"Flying"  
}

In [None]:
key_list = list(TYPES.keys())
val_list = list(TYPES.values())

position = val_list.index("Fire")
print(key_list[position])

In [None]:
def replace_type(x):
    position = val_list.index(x)
    return key_list[position]

def get_rgb_images(file):
    if file.split(".")[1] == "jpg":
        im = Image.open(image_dir+ file)
        return np.asarray(im).astype(np.float32)
    else:
        im = Image.open(image_dir+ file).convert('RGBA')
        im.load()
        new_im = Image.new("RGB", im.size, (255, 255, 255))
        new_im.paste(im, mask = im.split()[3])
        return np.asarray(new_im).astype(np.float32)

pokemon_joined = pokemon_stats.merge(pokemon_filenames,how='outer',indicator=True)
pokemon_joined["Name"] = pokemon_joined["Name"].apply(lambda x: x.split('-')[0])
pokemon_joined["Type1"] = pokemon_joined.apply(lambda x: replace_type(x['Type1']), axis=1)
pokemon_joined["Image"] = pokemon_joined.apply(lambda x: get_rgb_images(x['Filepath']), axis=1)
pokemon_joined

In [None]:
# Check if values
pokemon_joined["_merge"].value_counts()

In [None]:
pokemon_joined.Type1.value_counts()

## Plot data

In [None]:
import random
from matplotlib import pyplot as plt
from matplotlib import image

def get_filepaths(ds, type_,num):
    idxs = list(ds[ds=="Type1"].index)
    if len(idxs) > num:
        idxs = random.sample(idxs,num)
    pokemon = list(ds['Filepath'][idxs])
    names = list(ds['Name'][idxs])
    images = list(ds["Image"][idxs])
    return list(zip(pokemon,names, images))

In [None]:
def display_type(type_, nb):
    position = val_list.index(type_)
    type_ = key_list[position]
    
    pkms = pokemon_joined[pokemon_joined["Type1"] == type_]
    filepaths = get_filepaths(pkms, type_, nb)
    
    fig=plt.figure(figsize=(20,3))
    plt.box(False)
    plt.xticks([])
    plt.yticks([])
    plt.text(0.04, 0.5, 'ground truth', fontsize=14, transform=plt.gcf().transFigure)
    
    for j in range(nb):
        ax = fig.add_subplot(1,nb,j+1)
        if j >= len(filepaths):
            plt.box(False)
            plt.xticks([])
            plt.yticks([])
            continue
        filepath,name, image = filepaths[j]
        img = np.asarray(image).astype('uint8')
        plt.imshow(img)
        plt.title(name)
        plt.xticks([])
        plt.yticks([])
            
        spine_color = 'green'
        for spine in ax.spines.values():
            spine.set_edgecolor(spine_color)
            
    plt.show()

In [None]:
display_type("Ice", 10)

### Little analyse on image

In [None]:
# Get sample image
rgbImage = image.imread(image_dir+"braixen.png")

# Divide image in 3 channels
redChannel = rgbImage[:, :, 0]
greenChannel = rgbImage[:, :, 1]
blueChannel = rgbImage[:, :, 2]
transpChannel = rgbImage[:, :, 3]
new_transp = (abs(transpChannel - 1) * 255).astype(int)


allblack = np.zeros((120,120))

r = np.dstack((redChannel, allblack, allblack))
g = np.dstack((allblack, greenChannel, allblack))
b = np.dstack((allblack, allblack, blueChannel))
a = np.dstack((allblack, allblack, allblack, transpChannel))

# Display them all.
plt.figure(figsize=(20,3))

plt.subplot(1, 6, 1);
plt.imshow(rgbImage);
plt.fontSize = 20;
plt.title('Original')
plt.subplot(1, 6, 2);
plt.imshow(r);
plt.title('R')
plt.subplot(1, 6, 3);
plt.imshow(g)
plt.title('G')
plt.subplot(1, 6, 4);
plt.imshow(b);
plt.title('B')
plt.subplot(1, 6, 5);
plt.title('A')
plt.imshow(a);

plt.subplot(1, 6, 6);
plt.title('RGB')
rgb = np.dstack((redChannel + new_transp,greenChannel + new_transp,blueChannel + new_transp))
plt.imshow(rgb)
print(rgbImage.shape)

## Create CNN to predict type

In [None]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models

### Create training data

In [None]:
data = np.array(pokemon_joined["Image"])
x_train = np.empty((data.shape[0], 120, 120, 3))
for i in range(data.shape[0]):
    x_train[i] = np.array(data[i])

In [None]:
 y_train = np.array(pokemon_joined["Type1"])

In [None]:
x_train.shape

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(x_train,y_train,test_size=0.2,random_state=0)
print(len(X_train))
print(len(y_train))

#X_train = np.asarray(X_train)
#y_train = np.asarray(y_train)
#X_test = np.asarray(X_test)
#y_test = np.asarray(y_test)

In [None]:
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
X_train

In [None]:
plt.figure(figsize=(20,3))
plt.subplot(1, 6, 1);
plt.imshow(X_train[0].astype("uint8"))
plt.title(TYPES[y_train[0]])
plt.subplot(1, 6, 2);
plt.imshow(X_train[1].astype("uint8"))
plt.title(TYPES[y_train[1]])
plt.subplot(1, 6, 3);
plt.imshow(X_train[2].astype("uint8"))
plt.title(TYPES[y_train[2]])
plt.subplot(1, 6, 4);
plt.imshow(X_train[3].astype("uint8"))
plt.title(TYPES[y_train[3]])
plt.subplot(1, 6, 5);
plt.imshow(X_train[4].astype("uint8"))
plt.title(TYPES[y_train[4]])
plt.subplot(1, 6, 6);
plt.title(TYPES[y_train[5]])
plt.imshow(X_train[5].astype("uint8"))
print("5 first training images")

In [None]:
X_train /= 255.0
X_test /= 255.0

In [None]:
y_train.shape

### Model architecture

In [None]:
# Model creation
model = models.Sequential()
model.add(layers.Conv2D(32, (3,3), activation='relu',input_shape=(120,120, 3)))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3,2),activation='relu'))
model.add(layers.MaxPooling2D((2,2)))
model.add(layers.Conv2D(64, (3, 3), activation='relu'))
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(18))

model.summary()

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
history = model.fit(X_train, y_train, epochs=10, validation_data=(X_test, y_test))

In [None]:
#plt.plot(history.history['accuracy'], label='accuracy')
#plt.plot(history.history['val_accuracy'], label = 'val_accuracy')
#plt.xlabel('Epoch')
#plt.ylabel('Accuracy')
#plt.ylim([0.5, 1])
#plt.legend(loc='lower right')

#test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)