# Pokemon Analysis - Some Questions and Answers

## This notebook answers some theoretical and emperical questions about pokemon typings

In [1]:
import pandas as pd
import numpy as np
from functools import reduce


In [2]:
# These questions can be answered with two datasets:
pokemon = pd.read_csv('Pokemon.csv')
types = pd.read_csv('Types.csv')

In [3]:
# The pokemon dataframe contains all pokemons (first 7 generations) and their stats
pokemon.head()

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False


In [4]:
# The types dataframe contains type effectiveness information
types

Unnamed: 0,Attacking,Normal,Fire,Water,Electric,Grass,Ice,Fighting,Poison,Ground,Flying,Psychic,Bug,Rock,Ghost,Dragon,Dark,Steel,Fairy
0,Normal,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0.5,0.0,1.0,1.0,0.5,1.0
1,Fire,1,0.5,0.5,1.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,0.5,1.0,0.5,1.0,2.0,1.0
2,Water,1,2.0,0.5,1.0,0.5,1.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0,1.0,1.0
3,Electric,1,1.0,2.0,0.5,0.5,1.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,1.0,0.5,1.0,1.0,1.0
4,Grass,1,0.5,2.0,1.0,0.5,1.0,1.0,0.5,2.0,0.5,1.0,0.5,2.0,1.0,0.5,1.0,0.5,1.0
5,Ice,1,0.5,0.5,1.0,2.0,0.5,1.0,1.0,2.0,2.0,1.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0
6,Fighting,2,1.0,1.0,1.0,1.0,2.0,1.0,0.5,1.0,0.5,0.5,0.5,2.0,0.0,1.0,2.0,2.0,0.5
7,Poison,1,1.0,1.0,1.0,2.0,1.0,1.0,0.5,0.5,1.0,1.0,1.0,0.5,0.5,1.0,1.0,0.0,2.0
8,Ground,1,2.0,1.0,2.0,0.5,1.0,1.0,2.0,1.0,0.0,1.0,0.5,2.0,1.0,1.0,1.0,2.0,1.0
9,Flying,1,1.0,1.0,0.5,2.0,1.0,2.0,1.0,1.0,1.0,1.0,2.0,0.5,1.0,1.0,1.0,0.5,1.0


# Question 1: How many unique type combinations are there?


Let $V$ be a one-dimensional vector containing all pokemon types. Then, all type combinations are uniquely defined by the upper (or lower) triangle of the outer product of this vector. 

$\text{all_combos} = V.V^T$

Note1: the diagonal of this matrix is all monotypes. there are 18 
<br>
Note2: the upper and lower triangle (discluding the diagonal) are symmetric with respect to combinations, but non-unique with respect to permutations. The permutations can represent primary and secondary typings of dual type pokemons. For example, all_combos[0,1] = NormalFire and all_combos[1,0] = FireNormal. Since I am not aware of primary/secondary typings having much impact, let us only care about unique combinations.


In [5]:
types_vector = np.asarray(types['Attacking'])
all_permutations = types_vector[:,None]+types_vector # outer product
all_combos = set(np.triu(all_permutations).flatten())
all_combos.remove(0) # some extra fluff from np.triu

In [6]:
print("Answer: There are exactly %d type combinations in Pokemon" % (len(all_combos)))

Answer: There are exactly 171 type combinations in Pokemon


Obviously, it was not necessary to compute all unique combinations explicitly, since the answer is a triangular number: 
<br>
$N * (N+1) / 2 = 18 * 19 / 2 = 171$

However, storing these combinations for later will help us answer some questiosn :)


# Question 2: Are all combinations available?

In [7]:
# this helper function will help us solve some problems
def typeCombinationSplitter(combo: str):
    """
        Args:
            combo (str) : A pokemon typing string from what is produced by 
            all_combos above. E.g. 'GroundWater', 'WaterWater'
        
        Returns:
            2-tuple : lists both 
    
    """
    # splits the combination typing into component types
    t = set([])
    for pokemon_type in types_vector:
        if pokemon_type in combo:
            t.add(pokemon_type)
    # note: we lexographically sort here, to aid in 
    # checking for uniqueness later on.
    return tuple(sorted(list(t)))

In [8]:
# We need to re-orient all_combos so that type1, type2 are lexographically ordered. 
# This could have also been achieved by sorting the types in the original types data table.
all_sorted_combos = set([])
for combo in all_combos:
    this_combo = reduce(lambda x,y : x+y, typeCombinationSplitter(combo))
    all_sorted_combos.add(this_combo)

In [9]:
# Now, we can gather the emperical combos for comparisons
all_emperical_combos = set([])
pokemon
for ind, row in pokemon.iterrows():
    type1, type2 = row["Type 1"], row["Type 2"]
    nonzero_types = [type1]
    if type2 != None and type(type2) == str: # otherwise it is NaN - monotype
        nonzero_types.append(type2)

    # sorting to avoid non-unique permuatations. 
    this_combo = reduce(lambda x,y : x+y, sorted(nonzero_types))
    all_emperical_combos.add(this_combo)

print("There are {} of {} theoretical typings".format(len(all_emperical_combos), len(all_sorted_combos)))
print("The following typings are currently unavailable")
for combo in all_sorted_combos - all_emperical_combos:
    print(combo)

# sanity check, we should have no emperical combos outside of the theoretical ones
assert (len(all_emperical_combos - all_sorted_combos) == 0)   
    

There are 133 of 171 theoretical typings
The following typings are currently unavailable
FirePoison
DarkFairy
ElectricPoison
FairyFighting
FightingGhost
BugIce
FireIce
DarkNormal
IceNormal
DragonNormal
NormalSteel
FairyGround
NormalPoison
FightingIce
PoisonRock
ElectricPsychic
ElectricRock
FightingGround
BugFairy
FairyFire
FireGrass
BugDragon
FairyPoison
FairyGhost
BugNormal
NormalRock
DarkElectric
GhostNormal
BugPsychic
IcePoison
FairyIce
GhostRock
PoisonSteel
ElectricFighting
BugDark
DragonFighting
PoisonPsychic
IceSteel


Interesting. There are still 38 dual types that do not yet exist :). 

# Question 3: Which types are most and least prevelant?

In [17]:
# To answer this question, we can repurpose the above code to use a counter:
all_emperical_combos = {}
pokemon
for ind, row in pokemon.iterrows():
    type1, type2 = row["Type 1"], row["Type 2"]
    nonzero_types = [type1]
    if type2 != None and type(type2) == str: # otherwise it is NaN - monotype
        nonzero_types.append(type2)

    # sorting to avoid non-unique permuatations. 
    this_combo = reduce(lambda x,y : x+y, sorted(nonzero_types))
    if this_combo in all_emperical_combos:
        all_emperical_combos[this_combo] += 1
    else:
        all_emperical_combos[this_combo] = 1

ranked_by_prevalance = [(combo, count) for combo, count in all_emperical_combos.items()]
ranked_by_prevalance.sort(key=lambda x : x[1], reverse=True)

print("\n")
print("The top 10 most prevalant pokemon types are:")
for combo, count in ranked_by_prevelance[:10]:
    print(combo, count)

print("\n")
print("The top 10 least prevalant pokemon types are:")
for combo, count in ranked_by_prevelance[-10:]:
    print(combo, count)
    



The top 10 most prevalant pokemon types are:
Normal 61
Water 59
Psychic 38
Grass 33
Fire 28
Electric 27
FlyingNormal 24
Fighting 20
Bug 17
GrassPoison 15


The top 10 least prevalant pokemon types are:
DragonSteel 1
FireSteel 1
ElectricGround 1
FightingRock 1
GroundNormal 1
DragonPoison 1
FightingFlying 1
ElectricFairy 1
GhostPsychic 1
FireWater 1


Cool! Some people would definitely had guessed that Water and Normal would be most prevalent. However I was surprised to see some dual types in the top 10.. Although it must be true that "FlyingNormal" blankets all regions xD. 

## Question 3 Bonus: How many pokemon are the *only* one of their type combo, and what are they?

This question is begging to be answered by the previous result. How many pokemon are lonely (only ones)?

In [35]:
# Gathering them all by name, it's easiest just to repurpose the above code (third time)
all_emperical_combos = {}
pokemon
for ind, row in pokemon.iterrows():
    type1, type2 = row["Type 1"], row["Type 2"]
    nonzero_types = [type1]
    if type2 != None and type(type2) == str: # otherwise it is NaN - monotype
        nonzero_types.append(type2)

    # sorting to avoid non-unique permuatations. 
    this_combo = reduce(lambda x,y : x+y, sorted(nonzero_types))
    if this_combo in all_emperical_combos:
        all_emperical_combos[this_combo].append(row["Name"])
    else:
        all_emperical_combos[this_combo] = [row['Name']]


ranked_by_prevalance = [(combo, len(val), val) for combo, val in all_emperical_combos.items()]
ranked_by_prevalance.sort(key=lambda x : x[1], reverse=True)
pb = 1
while(ranked_by_prevalance[-(pb+1)][1] == 1):
    pb += 1
    
print("\n")
print("There are {} pokemon who are the only ones of their type combo!. Here they Are:".format(pb))
for combo, count, these_pokemon in ranked_by_prevalance[-pb:]:
    print("{} - {}".format(combo, these_pokemon[0]))




There are 24 pokemon who are the only ones of their type combo!. Here they Are:
FireRock - Magcargo
FlyingSteel - Skarmory
DragonGrass - SceptileMega Sceptile
BugWater - Surskit
BugGhost - Shedinja
DragonFairy - AltariaMega Altaria
GrassGround - Torterra
SteelWater - Empoleon
NormalWater - Bibarel
GhostIce - Froslass
ElectricGhost - Rotom
ElectricFire - RotomHeat Rotom
ElectricIce - RotomFrost Rotom
ElectricGrass - RotomMow Rotom
DragonSteel - Dialga
FireSteel - Heatran
ElectricGround - Stunfisk
FightingRock - Terrakion
GroundNormal - Diggersby
DragonPoison - Dragalge
FightingFlying - Hawlucha
ElectricFairy - Dedenne
GhostPsychic - HoopaHoopa Confined
FireWater - Volcanion


# Question 4: What is the minimum number of unique typed moves to be super effective against all theoretical pokemon typings?

This question is hugely interesting, since it could lead to a moveset that has 100% super effective coverage

In [55]:
# To confirm our helper function is working appropriately
for combo in all_combos:
    print(combo, typeCombinationSplitter(combo))

TypeError: argument of type 'int' is not iterable

In [10]:
def typeComboVulnerabilities(combo):
    # computes the vector of type vulnerabilities for this 
    # type combinations
    # TODO: remove copy.. it messed up things otherwise..
    
    if(len(combo) == 1):
        vals = types[combo[0]].copy()
    
    else:
        # it's just broadcasted product
        vals = types[combo[0]] * types[combo[1]]
    vals.index = types['Attacking']
    return vals

In [11]:
# for example
type1 = typeCombinationSplitter("BugFire")
type2 = typeCombinationSplitter("BugBug")

print(typeComboVulnerabilities(type1))
#print(typeComboVulnerabilities(type2))

Attacking
Normal      1.00
Fire        1.00
Water       2.00
Electric    1.00
Grass       0.25
Ice         0.50
Fighting    0.50
Poison      1.00
Ground      1.00
Flying      2.00
Psychic     1.00
Bug         0.50
Rock        4.00
Ghost       1.00
Dragon      1.00
Dark        1.00
Steel       0.50
Fairy       0.50
dtype: float64


In [12]:
def typePoolEffectiveness(type_pool: list):
    # returns how many pokemons this type pool 
    # would be super effective against
    count = 0
    for pokemon_typing in all_combos:
        combo = typeCombinationSplitter(pokemon_typing)
        vuln = typeComboVulnerabilities(combo)
        for movetype in type_pool:
            if vuln[movetype] >= 2:
                count += 1
                break
    return count

In [13]:
# typePoolEffectiveness will give us the number of theoretical types that 
# a given movepool will at least have one super effective move against.

# Here is a test of one of the well-known high coverage movepoools for
# Electivire: 
count = typePoolEffectiveness(['Electric', 'Ice', 'Fighting', 'Ground'])
print("Effective against %s out of 171 theoretical typings" % (count))

Effective against 135 out of 171 theoretical typings


In [16]:
from itertools import combinations
def getBestMovePoolsUsingNMoves(N):

    ans = None
    best = 0
    for type_pool_combo in combinations(types['Attacking'], N):
        val = typePoolEffectiveness(type_pool_combo)
        if val == best:
            ans.append(type_pool_combo)
        elif(val > best):
            best = val
            ans = [type_pool_combo]
    
    return best, ans
        

In [20]:
count, ans = getBestMovePoolsUsingNMoves(3)
print(count, ans)

134 [('Ice', 'Ground', 'Rock'), ('Ground', 'Rock', 'Fairy')]


In [21]:
count, ans = getBestMovePoolsUsingNMoves(4)
print(count, ans)

150 [('Ice', 'Ground', 'Rock', 'Fairy')]


In [29]:
count, ans = getBestMovePoolsUsingNMoves(2)
print(count, ans)

109 [('Ice', 'Ground')]


In [26]:
# based on what is super effective against it
best  = [18, None]
worst = [0, None]
for pokemon_typing in all_combos:
    combo = typeCombinationSplitter(pokemon_typing)
    vuln = typeComboVulnerabilities(combo)
    supp = [val for val in vuln if val >= 2]
    n = len(supp)
    if(n < best[0]):
        best = [n, [combo]]
    elif(n == best[0]):
        best = [n, best[1] + [combo]]
    if(n > worst[0]):
        worst = [n, [combo]]
    elif(n == worst[0]):
        worst = [n, worst[1] + [combo]]
print("best: ", best)
print("worst: ", worst)

best:  [1, [('Bug', 'Steel'), ('Ghost', 'Normal'), ('Dark', 'Ghost'), ('Electric',), ('Normal',), ('Dark', 'Poison'), ('Ground', 'Water')]]
worst:  [7, [('Dark', 'Grass'), ('Psychic', 'Rock'), ('Grass', 'Ice'), ('Dark', 'Rock'), ('Grass', 'Psychic'), ('Fighting', 'Rock')]]


In [28]:
# Based on invulnerabilities
best  = [18, None]
worst = [0, None]
for pokemon_typing in all_combos:
    combo = typeCombinationSplitter(pokemon_typing)
    vuln = typeComboVulnerabilities(combo)
    supp = [val for val in vuln if val == 0.]
    n = len(supp)
    if(n < best[0]):
        best = [n, [combo]]
    elif(n == best[0]):
        best = [n, best[1] + [combo]]
    if(n > worst[0]):
        worst = [n, [combo]]
    elif(n == worst[0]):
        worst = [n, worst[1] + [combo]]
print("best: ", worst)

best:  [3, [('Fairy', 'Ghost'), ('Ghost', 'Normal'), ('Dark', 'Ghost'), ('Flying', 'Ghost'), ('Ghost', 'Steel'), ('Ghost', 'Ground')]]


In [18]:
typePoolEffectiveness(["Electric", "Ground", "Ice", "Fire"])

143