In [419]:
import numpy as np
import pandas as pd
import math
import random

random.seed(47)

In [420]:
# load data
combats = pd.read_csv('./Python/Data/combats.csv')
pokemons = pd.read_csv('./Python/Data/pokemon.csv')
tests = pd.read_csv('./Python/Data/tests.csv')

In [421]:
pokemons.head()

Unnamed: 0,#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,80,82,83,100,100,80,1,False
3,4,Mega Venusaur,Grass,Poison,80,100,123,122,120,80,1,False
4,5,Charmander,Fire,,39,52,43,60,50,65,1,False


In [422]:
print("attack range: %i - %i" % (pokemons.HP.min(), pokemons.HP.max()))
print("attack range: %i - %i" % (pokemons.Attack.min(), pokemons.Attack.max()))
print("attack range: %i - %i" % (pokemons.Defense.min(), pokemons.Defense.max()))

attack range: 1 - 255
attack range: 5 - 190
attack range: 5 - 230


In [423]:
# put range in vars
Hp = [pokemons.HP.min(), pokemons.HP.max()]
Atk = [pokemons.Attack.min(), pokemons.Attack.max()]
Def = [pokemons.Defense.min(), pokemons.Defense.max()]

In [424]:
pokemons['Type 1'].unique()

array(['Grass', 'Fire', 'Water', 'Bug', 'Normal', 'Poison', 'Electric',
       'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Ghost', 'Ice',
       'Dragon', 'Dark', 'Steel', 'Flying'], dtype=object)

In [425]:
pokemons['Type 2'].unique().size

19

In [426]:
testdata = pokemons.loc[1:10,['HP','Attack','Defense','Name']]

In [427]:
testdata.values

array([[60, 62, 63, 'Ivysaur'],
       [80, 82, 83, 'Venusaur'],
       [80, 100, 123, 'Mega Venusaur'],
       [39, 52, 43, 'Charmander'],
       [58, 64, 58, 'Charmeleon'],
       [78, 84, 78, 'Charizard'],
       [78, 130, 111, 'Mega Charizard X'],
       [78, 104, 78, 'Mega Charizard Y'],
       [44, 48, 65, 'Squirtle'],
       [59, 63, 80, 'Wartortle']], dtype=object)

### Steps to take
1) Set amount of K clusters  
2) Assign each data point to closest centroid  
3) Recompute the cluster centroids(mean of all data points in cluster)  
4) back to step 2 and repeat

In [522]:
def KMeans(points, k = 3, max_iter = 10):
    # generate k random centroids
    #centroids = centroid_gen(k)
    centroids = {0: [113, 162, 213], 1: [190,  100,  80], 2 : [190,  80,  80]}
    centroids = dict(centroids)

    print(centroids)
    cluster = {}
    
    iteration = 0
    while iteration < max_iter:
        print('\nIteration: ' + str(iteration))
        for i in range(0,len(points)):
            distances = calc_dist(centroids, points[i])
            min_dist_key = min(distances, key=distances.get)
            cluster[i] = {'centroid': min_dist_key, 'distance': distances[min_dist_key] }
            print('Cluster: %s , distance: %s' % (cluster[i]['centroid'], cluster[i]['distance']))
        iteration += 1
        
        # Find new centroid mean
        new_centroids = {}
        for i in range(k):
            print('Cluster is:' + str(i))
            C = np.array([])
            for j in range(len(points)):
                if i == cluster[j]['centroid']:
                    np.append(C, points[j][0:2], axis=0)
                    #C.r_[j,points[j][0:2]]
                    print(points[j])
            print(C)
            print('------------------- \n')
    return cluster

In [429]:
# generate k random centroids
def centroid_gen(k):
    centroids = {}
    for c in range(0,k):
        rand_hp = random.randint(Hp[0],Hp[1])
        rand_atk = random.randint(Atk[0],Atk[1])
        rand_def = random.randint(Def[0],Def[1])
        centroids[c] = np.array([rand_hp, rand_atk, rand_def])
    return centroids

In [430]:
# calculate Euclidean distance
def calc_dist(cen, p):
    distances = {}
    for k in cen:
        Sum = (cen[k][0]-p[0])**2 + (cen[k][1]-p[1])**2 + (cen[k][2]-p[2])**2
        distance = math.sqrt(Sum)
        distances[k] = distance
    return distances

In [523]:
mest = KMeans(testdata.values ,3 , 3)

{0: [113, 162, 213], 1: [190, 100, 80], 2: [190, 80, 80]}

Iteration: 0
Cluster: 2 , distance: 132.33669181296622
Cluster: 2 , distance: 110.05907504608605
Cluster: 0 , distance: 114.16216536138407
Cluster: 2 , distance: 157.96835126062436
Cluster: 2 , distance: 134.77388471065157
Cluster: 2 , distance: 112.08925015361643
Cluster: 0 , distance: 112.48555462813881
Cluster: 1 , distance: 112.08925015361643
Cluster: 2 , distance: 150.2165104108067
Cluster: 2 , distance: 132.09844813622905
Cluster is:0
[]
[]
[80 100 123 'Mega Venusaur']
[]
[]
[]
[]
[78 130 111 'Mega Charizard X']
[]
[]
[]
[]
------------------- 

Cluster is:1
[]
[]
[]
[]
[]
[]
[]
[78 104 78 'Mega Charizard Y']
[]
[]
[]
------------------- 

Cluster is:2
[60 62 63 'Ivysaur']
[]
[80 82 83 'Venusaur']
[]
[]
[39 52 43 'Charmander']
[]
[58 64 58 'Charmeleon']
[]
[78 84 78 'Charizard']
[]
[]
[]
[44 48 65 'Squirtle']
[]
[59 63 80 'Wartortle']
[]
------------------- 


Iteration: 1
Cluster: 2 , distance: 132.33669181296622
Cluster

In [432]:
mest

{0: {'centroid': 1, 'distance': 162.09256614663116},
 1: {'centroid': 1, 'distance': 141.54151334502538},
 2: {'centroid': 0, 'distance': 114.16216536138407},
 3: {'centroid': 1, 'distance': 186.59314028120113},
 4: {'centroid': 1, 'distance': 164.58736281987143},
 5: {'centroid': 1, 'distance': 143.6975991448709},
 6: {'centroid': 0, 'distance': 112.48555462813881},
 7: {'centroid': 1, 'distance': 147.0},
 8: {'centroid': 1, 'distance': 178.99161991557034},
 9: {'centroid': 1, 'distance': 162.15116404145854}}

In [433]:
#for i in range(0,1):
points = [testdata.values[j] for j in range(len(testdata))]
print(points)

[array([60, 62, 63, 'Ivysaur'], dtype=object), array([80, 82, 83, 'Venusaur'], dtype=object), array([80, 100, 123, 'Mega Venusaur'], dtype=object), array([39, 52, 43, 'Charmander'], dtype=object), array([58, 64, 58, 'Charmeleon'], dtype=object), array([78, 84, 78, 'Charizard'], dtype=object), array([78, 130, 111, 'Mega Charizard X'], dtype=object), array([78, 104, 78, 'Mega Charizard Y'], dtype=object), array([44, 48, 65, 'Squirtle'], dtype=object), array([59, 63, 80, 'Wartortle'], dtype=object)]


In [259]:
print(math.sqrt(7147))

84.53993139339539


In [137]:
testar = np.array([11, 112, 173])

In [179]:
dista = calc_dist(ceno,testar)
np.min(dista)

218.55891654197043

In [147]:
(ceno[0][0] - testar[0])**2 + (ceno[0][0] - testar[0])**2

96800

In [150]:
ceno[0]

array([231,  85, 150])

In [152]:
testar[0]

11