In [226]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [227]:
import pandas as pd
import numpy as np

from collections import Counter
from itertools import combinations

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
import warnings
warnings.filterwarnings("ignore")

In [228]:
import sys
sys.path.append("../") 

from utils.info import (names, primary_types, secondary_types, generations, pokemon, ash_pokedex, misty_pokedex, brock_pokedex, poke_names, 
                        poke_gens, stats, pokemon_types, hps)

stats = np.array(stats)
hps = np.array(hps)


# 13.3.0 Gaining efficiencies

## 13.3.2 Combining Pokémon names and types

In [229]:
# Combine names and primary_types
names_type1 = [*zip(names, primary_types)]

print(*names_type1[:5], sep='\n')

('Abomasnow', 'Grass')
('Abra', 'Psychic')
('Absol', 'Dark')
('Accelgor', 'Bug')
('Aerodactyl', 'Rock')


In [230]:
# Combine all three lists together
names_types = [*zip(names, primary_types, secondary_types)]

print(*names_types[:5], sep='\n')

('Abomasnow', 'Grass', 'Ice')
('Abra', 'Psychic', nan)
('Absol', 'Dark', nan)
('Accelgor', 'Bug', nan)
('Aerodactyl', 'Rock', 'Flying')


In [231]:
# Combine five items from names and three items from primary_types
differing_lengths = [*zip(names[:5], primary_types[:3])]

print(*differing_lengths, sep='\n')

('Abomasnow', 'Grass')
('Abra', 'Psychic')
('Absol', 'Dark')


## 13.3.3 Counting Pokémon from a sample

In [232]:
# Collect the count of primary types
type_count = Counter(primary_types)
print(type_count, '\n')

# Collect the count of generations
gen_count = Counter(generations)
print(gen_count, '\n')

# Use list comprehension to get each Pokémon's starting letter
starting_letters = [name[:1] for name in names]

# Collect the count of Pokémon for each starting_letter
starting_letters_count = Counter(starting_letters)
print(starting_letters_count)

Counter({'Water': 105, 'Normal': 92, 'Bug': 65, 'Grass': 64, 'Fire': 48, 'Psychic': 46, 'Rock': 41, 'Electric': 40, 'Ground': 30, 'Dark': 28, 'Poison': 28, 'Dragon': 25, 'Fighting': 25, 'Ice': 23, 'Steel': 21, 'Ghost': 20, 'Fairy': 17, 'Flying': 2}) 

Counter({5: 122, 3: 103, 1: 99, 4: 78, 2: 51, 6: 47}) 

Counter({'S': 102, 'M': 58, 'C': 55, 'P': 47, 'G': 46, 'D': 41, 'B': 39, 'T': 35, 'L': 33, 'A': 32, 'R': 30, 'H': 27, 'F': 26, 'K': 25, 'W': 23, 'V': 22, 'E': 21, 'N': 16, 'Z': 9, 'J': 7, 'O': 6, 'I': 5, 'U': 5, 'Q': 4, 'Y': 4, 'X': 2})


## 13.3.4 Combinations of Pokémon

In [233]:
# Import combinations from itertools
# from itertools import combinations

# Create a combination object with pairs of Pokémon
combos_obj = combinations(pokemon, 2)
print(type(combos_obj), '\n')

# Convert combos_obj to a list by unpacking
combos_2 = [*combos_obj]
print(combos_2, '\n')

# Collect all possible combinations of 4 Pokémon directly into a list
combos_4 = [*combinations(pokemon, 4)]
print(combos_4)

<class 'itertools.combinations'> 

[('Geodude', 'Cubone'), ('Geodude', 'Lickitung'), ('Geodude', 'Persian'), ('Geodude', 'Diglett'), ('Cubone', 'Lickitung'), ('Cubone', 'Persian'), ('Cubone', 'Diglett'), ('Lickitung', 'Persian'), ('Lickitung', 'Diglett'), ('Persian', 'Diglett')] 

[('Geodude', 'Cubone', 'Lickitung', 'Persian'), ('Geodude', 'Cubone', 'Lickitung', 'Diglett'), ('Geodude', 'Cubone', 'Persian', 'Diglett'), ('Geodude', 'Lickitung', 'Persian', 'Diglett'), ('Cubone', 'Lickitung', 'Persian', 'Diglett')]


## 13.3.5 Comparing Pokédexes

In [234]:
# Convert both lists to sets
ash_set = set(ash_pokedex)
misty_set = set(misty_pokedex)

# Find the Pokémon that exist in both sets
both = ash_set.intersection(misty_set)
print(both)

# Find the Pokémon that Ash has and Misty does not have
ash_only = ash_set.difference(misty_set)
print(ash_only)

# Find the Pokémon that are in only one set (not both)
unique_to_set = ash_set.symmetric_difference(misty_set)
print(unique_to_set)

{'Squirtle', 'Psyduck'}
{'Koffing', 'Zubat', 'Pikachu', 'Rattata', 'Vulpix', 'Wigglytuff', 'Bulbasaur', 'Spearow'}
{'Koffing', 'Zubat', 'Pikachu', 'Rattata', 'Magikarp', 'Tentacool', 'Horsea', 'Vulpix', 'Slowbro', 'Poliwag', 'Wigglytuff', 'Vaporeon', 'Krabby', 'Bulbasaur', 'Starmie', 'Spearow'}


## 13.3.6 Searching for Pokémon

In [235]:
# Convert Brock's Pokédex to a set
brock_pokedex_set = set(brock_pokedex)
print(brock_pokedex_set)

{'Onix', 'Zubat', 'Geodude', 'Omastar', 'Tauros', 'Vulpix', 'Golem', 'Dugtrio', 'Machop', 'Kabutops'}


In [236]:
# Check if Psyduck is in Ash's list and Brock's set
print('Psyduck' in ash_pokedex)
print('Psyduck' in brock_pokedex_set)

True
False


In [237]:
# Check if Machop is in Ash's list and Brock's set
print('Machop' in ash_pokedex)
print('Machop' in brock_pokedex_set)

False
True


Question
Within your IPython console, use %timeit to compare membership testing for 'Psyduck' in ash_pokedex, 'Psyduck' in brock_pokedex_set, 'Machop' in ash_pokedex, and 'Machop' in brock_pokedex_set (a total of four different timings).

Don't include the print() function. Only time the commands that you wrote inside the print() function in the previous steps.

Which membership testing was faster?

R:/ Member testing using a set is faster than using a list In all four cases.

## 13.3.7 Gathering unique Pokémon

In [238]:
from utils.info import names2 as names

In [239]:
def find_unique_items(data):
    uniques = []

    for item in data:
        if item not in uniques:
            uniques.append(item)

    return uniques

In [240]:
# Use the provided function to collect unique Pokémon names
uniq_names_func = find_unique_items(names)
print(len(uniq_names_func))

368


In [241]:
# Convert the names list to a set to collect unique Pokémon names
uniq_names_set = set(names)
print(len(uniq_names_set))

# Check that both unique collections are equivalent
print(sorted(uniq_names_func) == sorted(uniq_names_set))

368
True


Question
Within your IPython console, use %timeit to compare the find_unique_items() function with using a set data type to collect unique Pokémon character names in names.

Which membership testing was faster?

R:/ Using a set to collect unique values Is faster

In [242]:
# Use find_unique_items() to collect unique Pokémon names
uniq_names_func = find_unique_items(names)
print(len(uniq_names_func))

# Convert the names list to a set to collect unique Pokémon names
uniq_names_set = set(names)
print(len(uniq_names_set))

# Check that both unique collections are equivalent
print(sorted(uniq_names_func) == sorted(uniq_names_set))

# Use the best approach to collect unique primary types and generations
uniq_types = set(primary_types)
uniq_gens = set(generations)
print(uniq_types, uniq_gens, sep='\n') 

368
368
True
{'Rock', 'Electric', 'Psychic', 'Ice', 'Fairy', 'Ground', 'Ghost', 'Flying', 'Poison', 'Dark', 'Normal', 'Bug', 'Grass', 'Water', 'Dragon', 'Steel', 'Fighting', 'Fire'}
{1, 2, 3, 4, 5, 6}


## 13.3.9 Gathering Pokémon without a loop

In [243]:
gen1_gen2_name_lengths_loop = []

for name,gen in zip(poke_names, poke_gens):
    if gen < 3:
        name_length = len(name)
        poke_tuple = (name, name_length)
        gen1_gen2_name_lengths_loop.append(poke_tuple)

In [244]:
# Collect Pokémon that belong to generation 1 or generation 2
gen1_gen2_pokemon = [name for name,gen in zip(poke_names, poke_gens) if gen <3]

# Create a map object that stores the name lengths
name_lengths_map = map(len, gen1_gen2_pokemon)

# Combine gen1_gen2_pokemon and name_lengths_map into a list
gen1_gen2_name_lengths = [*zip(gen1_gen2_pokemon, name_lengths_map)]

print(gen1_gen2_name_lengths_loop[:5])
print(gen1_gen2_name_lengths[:5])

[('Abra', 4), ('Aerodactyl', 10), ('Aipom', 5), ('Alakazam', 8), ('Ampharos', 8)]
[('Abra', 4), ('Aerodactyl', 10), ('Aipom', 5), ('Alakazam', 8), ('Ampharos', 8)]


## 13.3.10 Pokémon totals and averages without a loop

In [245]:
from utils.info import names as names

In [246]:
poke_list = []

for pokemon,row in zip(names, stats):
    total_stats = np.sum(row)
    avg_stats = np.mean(row)
    poke_list.append((pokemon, total_stats, avg_stats))

In [247]:
# Create a total stats array
total_stats_np = stats.sum(axis=1)

# Create an average stats array
avg_stats_np = stats.mean(axis=1)

# Combine names, total_stats_np, and avg_stats_np into a list
poke_list_np = [*zip(names, total_stats_np, avg_stats_np)]

print(poke_list_np == poke_list, '\n')
print(poke_list_np[:3])
print(poke_list[:3], '\n')
top_3 = sorted(poke_list_np, key=lambda x: x[1], reverse=True)[:3]
print('3 strongest Pokémon:\n{}'.format(top_3))

True 

[('Abomasnow', 494, 82.33333333333333), ('Abra', 310, 51.666666666666664), ('Absol', 465, 77.5)]
[('Abomasnow', 494, 82.33333333333333), ('Abra', 310, 51.666666666666664), ('Absol', 465, 77.5)] 

3 strongest Pokémon:
[('GroudonPrimal Groudon', 770, 128.33333333333334), ('KyogrePrimal Kyogre', 770, 128.33333333333334), ('Arceus', 720, 120.0)]


## 13.3.12 One-time calculation loop

In [248]:
# for gen,count in gen_counts.items():
#     total_count = len(generations)
#     gen_percent = round(count / total_count * 100, 2)
#     print(
#       'generation {}: count = {:3} percentage = {}'
#       .format(gen, count, gen_percent)
#     )

In [249]:
# Import Counter
# from collections import Counter

# Collect the count of each generation
gen_counts = Counter(generations)

# Improve for loop by moving one calculation above the loop
total_count = len(generations)

for gen,count in gen_counts.items():
    gen_percent = round(count/ total_count * 100,2)
    print('generation {}: count = {:3} percentage = {}'
          .format(gen, count, gen_percent))

generation 1: count =  99 percentage = 19.8
generation 5: count = 122 percentage = 24.4
generation 3: count = 103 percentage = 20.6
generation 6: count =  47 percentage = 9.4
generation 4: count =  78 percentage = 15.6
generation 2: count =  51 percentage = 10.2


## 13.3.13 Holistic conversion loop

In [250]:
# enumerated_pairs = []

# for i,pair in enumerate(possible_pairs, 1):
#     enumerated_pair_tuple = (i,) + pair
#     enumerated_pair_list = list(enumerated_pair_tuple)
#     enumerated_pairs.append(enumerated_pair_list)

In [251]:
# Collect all possible pairs using combinations()
possible_pairs = [*combinations(pokemon_types, 2)]

# Create an empty list called enumerated_tuples
enumerated_tuples = []

# Add a line to append each enumerated_pair_tuple to the empty list above
for i,pair in enumerate(possible_pairs, 1):
    enumerated_pair_tuple = (i,) + pair
    enumerated_pair_list = list(enumerated_pair_tuple)
    enumerated_tuples.append(enumerated_pair_list)

# Convert all tuples in enumerated_tuples to a list
enumerated_pairs = [*map(list, enumerated_tuples)]
print(enumerated_pairs)

[[1, 'Bug', 'Dark'], [2, 'Bug', 'Dragon'], [3, 'Bug', 'Electric'], [4, 'Bug', 'Fairy'], [5, 'Bug', 'Fighting'], [6, 'Bug', 'Fire'], [7, 'Bug', 'Flying'], [8, 'Bug', 'Ghost'], [9, 'Bug', 'Grass'], [10, 'Bug', 'Ground'], [11, 'Bug', 'Ice'], [12, 'Bug', 'Normal'], [13, 'Bug', 'Poison'], [14, 'Bug', 'Psychic'], [15, 'Bug', 'Rock'], [16, 'Bug', 'Steel'], [17, 'Bug', 'Water'], [18, 'Dark', 'Dragon'], [19, 'Dark', 'Electric'], [20, 'Dark', 'Fairy'], [21, 'Dark', 'Fighting'], [22, 'Dark', 'Fire'], [23, 'Dark', 'Flying'], [24, 'Dark', 'Ghost'], [25, 'Dark', 'Grass'], [26, 'Dark', 'Ground'], [27, 'Dark', 'Ice'], [28, 'Dark', 'Normal'], [29, 'Dark', 'Poison'], [30, 'Dark', 'Psychic'], [31, 'Dark', 'Rock'], [32, 'Dark', 'Steel'], [33, 'Dark', 'Water'], [34, 'Dragon', 'Electric'], [35, 'Dragon', 'Fairy'], [36, 'Dragon', 'Fighting'], [37, 'Dragon', 'Fire'], [38, 'Dragon', 'Flying'], [39, 'Dragon', 'Ghost'], [40, 'Dragon', 'Grass'], [41, 'Dragon', 'Ground'], [42, 'Dragon', 'Ice'], [43, 'Dragon', 'Nor

## 13.3.14 Bringing it all together: Pokémon z-scores

In [252]:
poke_zscores = []

for name,hp in zip(names, hps):
    hp_avg = hps.mean()
    hp_std = hps.std()
    z_score = (hp - hp_avg)/hp_std
    poke_zscores.append((name, hp, z_score))
highest_hp_pokemon = []

for name,hp,zscore in poke_zscores:
    if zscore > 2:
        highest_hp_pokemon.append((name, hp, zscore))

In [253]:
# Calculate the total HP avg and total HP standard deviation
hp_avg = hps.mean()
hp_std = hps.std()

# Use NumPy to eliminate the previous for loop
z_scores = (hps - hp_avg)/hp_std

# Combine names, hps, and z_scores
poke_zscores2 = [*zip(names, hps, z_scores)]
print(*poke_zscores2[:3], sep='\n')

('Abomasnow', 80.0, 0.46797638117739043)
('Abra', 60.0, -0.3271693284337512)
('Absol', 131.0, 2.4955979406858013)


In [254]:
# Use list comprehension with the same logic as the highest_hp_pokemon code block
highest_hp_pokemon2 = [(names, hps, z_scores) for names, hps, z_scores in poke_zscores2 if z_scores > 2]
print(*highest_hp_pokemon2, sep='\n')

('Absol', 131.0, 2.4955979406858013)
('Bonsly', 127.0, 2.3365687987635733)
('Caterpie', 122.0, 2.137782371360788)
('Cofagrigus', 133.0, 2.575112511646916)
('Cresselia', 126.0, 2.296811513283016)
('Dewgong', 122.0, 2.137782371360788)
('Druddigon', 126.0, 2.296811513283016)
('Froakie', 123.0, 2.1775396568413448)
('Kadabra', 135.0, 2.65462708260803)
('Klang', 123.0, 2.1775396568413448)
('Kricketune', 122.0, 2.137782371360788)
('Lumineon', 129.0, 2.4160833697246873)
('Magnemite', 137.0, 2.734141653569144)
('Nidorina', 119.0, 2.0185105149191167)
('Onix', 126.0, 2.296811513283016)
('Prinplup', 124.0, 2.217296942321902)
('Skuntank', 128.0, 2.3763260842441305)
('Swellow', 125.0, 2.2570542278024592)


Question
Use %%timeit (cell magic mode) within your IPython console to compare the runtimes between the original code blocks and the new code you developed using NumPy and list comprehension.

Don't include the print() statements when timing. You should include ten lines of code when timing the original code blocks and five lines of code when timing the new code you developed. You may need to press SHIFT+ENTER after entering %%timeit to get to a new line within your IPython console.

Which approach was the faster?

R:/ The total time for executing the update solution using Numpy and list complehension was faster.  Eliminated two loops using NumPy broadcasting and list comprehension.

In [255]:
print('Ok_')

Ok_
