In [0]:
"""
  README.md

  The following algorithm has been developed using the Pokemon Image Dataset of Kaggle.
  If you want to use it on the same dataset:

  First of all, go to https://www.kaggle.com/vishalsubbiah/pokemon-images-and-types
  Click on Download (2 MB)
  In order to use the following code on this dataset, do the following step of the README.md.
  To do this, go to the file explorator of google colab on your left. 
  Then, click right on the content folder and import the pokemon.csv file and all the images (.png and .jpg)
"""

In [0]:
# Projet
# Download the colormath module in order to use it later.
# Run this code just one time

import os, sys
from google.colab import drive
drive.mount('/content/mnt')
nb_path = '/content/notebooks'
#os.symlink('/content/mnt/My Drive/Colab Notebooks', nb_path)
sys.path.insert(0,nb_path)

!pip install --target=$nb_path colormath

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/mnt
Collecting colormath
  Downloading https://files.pythonhosted.org/packages/ce/cf/70ea34103a76cc6fb1892289bda321cd0cc73b1a5500ee7fe9ef9f64acef/colormath-3.0.0.tar.gz
Collecting numpy
[?25l  Downloading https://files.pythonhosted.org/packages/07/08/a549ba8b061005bb629b76adc000f3caaaf881028b963c2e18f811c6edc1/numpy-1.18.2-cp36-cp36m-manylinux1_x86_64.whl (20.2MB)
[K     |████████████████████████████████| 20.2MB 217kB/s 
[?25hCollecting networkx>=2.0
[?25l  Down

In [0]:
# Projet
# Conversion of the pokemon.csv file into pokemon.json file
# Run this code just one time

import csv
import json
from pandas.io.json import json_normalize

#-------------------------------------------------------------------------------

def ConversionCsvToJson(csvFileName, jsonFileName) :
  """ Convert a csv file into a json file
      Input : csvFileName (str) = Name of the csv file to convert
              jsonFileName (str) = Name of the json file result of the conversion
      Output : none """
  csvFile = open(csvFileName, 'r')
  fieldnames = ("Name", "Type1", "Type2")
  reader = csv.DictReader(csvFile, fieldnames)
  tmp = []
  for row in reader :
    tmp.append(row)
  with open(jsonFileName, 'w') as jsonFile :
    json.dump(tmp[1:], jsonFile)

#-------------------------------------------------------------------------------

ConversionCsvToJson('pokemon.csv', 'pokemon.json')
# Create df: a dataframe which contains all the pokemon information
data = json.load(open('pokemon.json'))
df = json_normalize(data)

In [0]:
# Projet
# Initialisation part of the algorithm
# WARNING: FUNCTION GETCOLORS TAKES TIME
# The execution of this code takes about 1 min 40 sec
# Run this code just one time

#-----------------------------------------------------------

import csv
import json
import pandas as pd
import matplotlib.pyplot as plot
import matplotlib.image as mpimg
import numpy
import glob
import random
import math
from PIL.Image import *
from sklearn.cluster import MiniBatchKMeans
from colormath.color_objects import LabColor, sRGBColor
from colormath.color_conversions import convert_color
from colormath.color_diff import delta_e_cie1976

#----------------------------------------------------------

def GetColors(x):
  """ Get the color RGBA of an image
      Input : x (int) = Number of predominant colors + 1
      Output : clusters.cluster_centers_ (numpy.ndarray) = array of RGBA for each color """

  numarray = numpy.array(imgfile.getdata(), numpy.uint8)

  cluster_count = x

  clusters = MiniBatchKMeans(n_clusters = cluster_count)
  clusters.fit(numarray.reshape(-4, 4))
  
  npbins = numpy.arange(0, cluster_count + 1)
  histogram = numpy.histogram(clusters.labels_, bins=npbins)
  labels = numpy.unique(clusters.labels_)
  clusters.cluster_centers_ = clusters.cluster_centers_[1:]
  return clusters.cluster_centers_, histogram[0][1:]

#-------------------------------------------------------------------

# Create listPkmn: a list which contain all pokemon png and jpg images
# Creat nbPkmn: an int corresponding to the number of pokemon imported = 809

listPkmn1 = glob.glob('*.png')
listPkmn2 = glob.glob('*.jpg')
listPkmn = listPkmn1
for pkmn in listPkmn2 :
  listPkmn.append(pkmn)
nbPkmn = len(listPkmn)


# Create ret: a list containing the color in RGBA of pkmn in listPkmn
# Create listRet: a list containing the ret of each pokemon
#--------------- Add a column named Color to the dataframe named df -----------------#
df['Color1'] = 0
df['Color2'] = 0
df['Color3'] = 0
df['Color1Apparition'] = 0
df['Color2Apparition'] = 0
df['Color3Apparition'] = 0

for pkmn in listPkmn :

  imgfile = open(pkmn)
  ret, apparition = GetColors(4)
  ret = numpy.around(ret, decimals=1)
  ret = ret.tolist()
  apparition = apparition.tolist()
  
  indexPkmn = df.loc[df['Name'] == pkmn[:-4]].index.item()

  for i, color in enumerate(ret) :
    rgb = sRGBColor(int(color[0]), int(color[1]), int(color[2]), is_upscaled=True)
    lab = convert_color(rgb, LabColor)
    df['Color'+str(i+1)][indexPkmn] = lab
    df['Color' + str(i+1) + 'Apparition'][indexPkmn] = apparition[i]
  imgfile.close()

#--------------- Add a column named Size to the dataframe named df ------------------#
df['Size'] = 0

# By size, we mean the size of the pokemon (calculated by the number of coloured pixels)
# rather than the size of the image itself

# img.getpixel extracts 3 parameters for jpg images and 4 parameters for png images
# Background of a png is white (0,0,0)
# Background of a jpg is black (255,255,255)
for pkmn in listPkmn :
  img = open(pkmn)
  cpt = 0
  if(pkmn[-3:] == "png") :
    largeur, hauteur = img.size
    for x in range(largeur) :
      for y in range(hauteur) :
        try :
          r, g, b, a = img.getpixel((x,y))
          if ( (r,g,b) != (0,0,0) ) :
            cpt += 1
        except TypeError :
          res = img.getpixel((x,y))
          if( res != 0) :
            cpt += 1
  if(pkmn[-3:] == "jpg") :
    largeur, hauteur = img.size
    for x in range(largeur) :
      for y in range(hauteur) :
        r,g,b=img.getpixel((x,y))
        if( (r,g,b) != (255,255,255) ) :
          cpt += 1
  indexPkmn = df.loc[df['Name'] == pkmn[:-4]].index.item()
  df['Size'][indexPkmn] += cpt


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https:

In [0]:
# Projet
# Prediction part of the algorithm

#----------------------------------------------------------------------------------------------

def GetDataFromDataframe(name) :
  """ From the name of a pokemon in the dataframe df,
      the function returns his type1, type2 and color
      Input : name (str) = Name of the pkmn
      Output : index (int) = the index of the pkmn
              type1 (str) = the type1 of the pkmn
              type2 (str or NoneType) = the type2 of the pkmn
              color (float) = the color of the pkmn
               """
  index = df.loc[df['Name'] == name].index.item()
  type1 = df.loc[index, 'Type1']
  type2 = df.loc[index, 'Type2']
  size = df.loc[index, 'Size']
  color1 = df.loc[index, 'Color1']
  color2 = df.loc[index, 'Color2']
  color3 = df.loc[index, 'Color3']
  color1Apparition = df.loc[index, 'Color1Apparition']
  color2Apparition = df.loc[index, 'Color2Apparition']
  color3Apparition = df.loc[index, 'Color3Apparition']

  return index, type1, type2, size, color1, color2, color3, color1Apparition, color2Apparition, color3Apparition

#----------------------------------------------------------------------------------------------

# Add a column named PredictRate to the dataframe named df
df['PredictRate'] = 0

# Create listDisplay: a list containing all the displayed pokemon
listDisplay = []

# Create printNb: an integer between 1 and 809 corresponding to the number of pokemon to print
printNb = input("Combien d'image voulez-vous afficher ? [nombre entier entre 1 et 809] ")
printNb = int(printNb)

# Print random pokemon. The number of print corresponding to printNb
# Create indexRdm: a random integer between 0 and nbPkmn=809
for i in range(printNb) :
  indexRdm = random.randint(0, nbPkmn)
  listDisplay.append(listPkmn[indexRdm])

fig=plot.figure(figsize=(10, 10))
columns = 2
rows = math.ceil(printNb/2)
for j in range(1, printNb+1):
  img = mpimg.imread(listDisplay[j-1])
  fig.add_subplot(rows, columns, j)
  plot.imshow(img)
plot.show()

# Create favoriteImg: an int corresponding to the user favorite print pokemon
# Create pkmnPref: a str containing the name of the choosen pokemon
phrase = "Quelle image préférez vous ? [entier entre 1 et " + str(printNb) + "]  "
favoriteImg = int(input(phrase))
pkmnPref = listDisplay[favoriteImg-1]
pkmnPref = pkmnPref[:-4]

# Create indexPkmnPref: an int which is the index of the choosen pkmn in the dataframe df
# Create type1PkmnPref: a str which is the type1 of the choosen pkmn in the dataframe df
# Create type2PkmnPref: a str or NoneType which is the type2 of the choosen pkmn in the dataframe df
# Create colorPkmnPref: a list containing the 3 LabColor of the choosen pkmn in the dataframe df
a, b, c, d, e, f, g, h, i, j = GetDataFromDataframe(pkmnPref)
indexPkmnPref = a
type1PkmnPref = b
type2PkmnPref = c
sizePkmnPref = d
color1PkmnPref = e
color2PkmnPref = f
color3PkmnPref = g
color1ApparitionPkmnPref = h
color2ApparitionPkmnPref = i
color3ApparitionPkmnPref = j

# Fill the PredictRate column of the dataframe df up
#The PredictRate is influenced as much as 
#30% by the color, 30% by the size, 30% by the primary type and 10% by the secondary type
# For each color, PredictRate varies from +0 (compare similar color) to +100 
#(compare black and white)

seuilAcceptanceColor = 10
for pkmn in listPkmn :
  ind, typ1, typ2, size, c1, c2, c3, c1A, c2A, c3A = GetDataFromDataframe(pkmn[:-4])
  delta_e1 = delta_e_cie1976(c1, color1PkmnPref)
  delta_e2 = delta_e_cie1976(c2, color2PkmnPref)
  delta_e3 = delta_e_cie1976(c3, color3PkmnPref)
  if(delta_e1 <= seuilAcceptanceColor) :
    df['PredictRate'][ind] += c1A
  if(delta_e2 <= seuilAcceptanceColor) :
    df['PredictRate'][ind] += c2A
  if(delta_e3 <= seuilAcceptanceColor) :
    df['PredictRate'][ind] += c3A

listIndexPredictType2 = numpy.where(df['Type2'] == type2PkmnPref)[0].tolist()
for val in listIndexPredictType2 :
  df.loc[df.index[val], 'PredictRate'] *= 1.1

seuilAcceptanceSize = 200
for indexPkmn in range(len(listPkmn)) :
  sizePkmn = df.loc[indexPkmn, 'Size']
  if( (sizePkmn <= sizePkmnPref+seuilAcceptanceSize) or (sizePkmn >= sizePkmnPref-seuilAcceptanceSize) ) :
    df['PredictRate'][indexPkmn] *= 1.5

listIndexPredictType1 = numpy.where(df['Type1'] == type1PkmnPref)[0].tolist()
for val in listIndexPredictType1 :
  df.loc[df.index[val], 'PredictRate'] *= 1.3

df = df.sort_values(by = 'PredictRate', ascending=False)
df.reset_index(drop=True, inplace=True)

listDisplay = []
for i in range(3) :
  pkmnName = df.loc[i, 'Name']
  listDisplay.append(pkmnName)
  try :
    img = mpimg.imread(pkmnName+'.png')
  except FileNotFoundError:
    pass
  try :
    img = mpimg.imread(pkmnName+'.jpg')
  except FileNotFoundError:
    pass
  plot.figure()
  plot.imshow(img)