# Writing Efficient Python Code

In [3]:
import this

## Built-in Functions

### 1. Enumerate

In [2]:
names = ['Jerry', 'Kramer', 'Elaine', 'George', 'Newman']
# Rewrite the for loop to use enumerate
indexed_names = []
for i,name in enumerate(names):
    index_name = (i,name)
    indexed_names.append(index_name) 
print(indexed_names)

# Rewrite the above for loop using list comprehension
indexed_names_comp = [(i,name) for i,name in enumerate(names)]
print(indexed_names_comp)

# Unpack an enumerate object with a starting index of one
indexed_names_unpack = [*enumerate(names, start=1)]
print(indexed_names_unpack)

[(0, 'Jerry'), (1, 'Kramer'), (2, 'Elaine'), (3, 'George'), (4, 'Newman')]
[(0, 'Jerry'), (1, 'Kramer'), (2, 'Elaine'), (3, 'George'), (4, 'Newman')]
[(1, 'Jerry'), (2, 'Kramer'), (3, 'Elaine'), (4, 'George'), (5, 'Newman')]


### 2. Map

In [4]:
# Use map to apply str.upper to each element in names
names_map  = map(str.upper, names)

# Print the type of the names_map
print(type(names_map))

# Unpack names_map into a list
names_uppercase = [*names_map]

# Print the list created above
print(names_uppercase)

<class 'map'>
['JERRY', 'KRAMER', 'ELAINE', 'GEORGE', 'NEWMAN']


### 3. Numpy Array

In [7]:
import numpy as np
# Create a list of arrival times
arrival_times = [*range(10,60,10)]

# Convert arrival_times to an array and update the times
arrival_times_np = np.array(arrival_times)
new_times = arrival_times_np - 3

# Use list comprehension and enumerate to pair guests to new times
guest_arrivals = [(names[i],time) for i,time in enumerate(new_times)]

print(guest_arrivals)

[('Jerry', 7), ('Kramer', 17), ('Elaine', 27), ('George', 37), ('Newman', 47)]


## Examining runtime

**ns**: nanosecond, $10^{-9}$ <br/>
**${\mu}$s**: microsecond, $10^{-6}$ <br/>
**ms**: millisecond, $10^{-3}$ <br/>
**s**: second, $10^0$

In [22]:
# Set number of runs to 2 (-r2)
# Set number of loops to 10 (-n10)

%timeit -r2 -n10 rand_nums = np.random.rand(1000) # 20 times to test the runtime

11.5 µs ± 1.71 µs per loop (mean ± std. dev. of 2 runs, 10 loops each)


Using python's literal syntax to define a data structure can speed up your runtime. <br/>
use `[]` instead of `list()`

### 1. Code Profiling

In [44]:
heroes = ['Batman', 'Superman', 'Wonder Woman']
hts = np.array([188.0, 191.0, 183.0])
wts = np.array([ 95.0, 101.0,  74.0])

In [45]:
def convert_units(heros, heights, weights):
    
    new_hts = [ht * 0.39370 for ht in heights] # -> new_hts = heights * 0.39370
    new_wts = [wt * 2.20462 for wt in weights] # -> new_wts = weights * 2.20462
    
    hero_data = {}
    
    for i, hero in enumerate(heros):
        hero_data[hero] = {new_hts[i], new_wts[i]}
    
    return hero_data

In [29]:
%%capture
pip install line_profiler

In [30]:
%load_ext line_profiler

In [39]:
%lprun -f convert_units convert_units(heroes, hts, wts)

### 2. Memory usage

Built-in way to see each object's memory usage

In [40]:
import sys

nums_list = [*range(1000)]
sys.getsizeof(nums_list)

8056

* Detailed stats on memory consumption <br/>
* Line-by-line analyses <br/>
* Package used: `memory_profiler`

In [51]:
%%capture
pip install memory_profiler

In [50]:
#%load_ext memory_profiler

The memory_profiler extension is already loaded. To reload it, use:
  %reload_ext memory_profiler


In [52]:
#%mprun -f convert_units convert_units(heroes, hts, wts) # only can assess the python funciton in the files

ERROR: Could not find file /var/folders/xp/mlshqg5536l5rb9cjy0mt5vm0000gn/T/ipykernel_16011/1795792953.py



## Counting and Writing better loops

### 1. combinations from itertools

In [55]:
pokemon = ['a', '1', 'c', '2', 'e']

# Import combinations from itertools
from itertools import combinations

# Create a combination object with pairs of Pokémon
combos_obj = combinations(pokemon, 2)
print(type(combos_obj), '\n')

# Convert combos_obj to a list by unpacking
combos_2 = [*combos_obj]
print(combos_2, '\n')

# Collect all possible combinations of 4 Pokémon directly into a list
combos_4 = [*combinations(pokemon, 4)]
print(combos_4)

<class 'itertools.combinations'> 

[('a', '1'), ('a', 'c'), ('a', '2'), ('a', 'e'), ('1', 'c'), ('1', '2'), ('1', 'e'), ('c', '2'), ('c', 'e'), ('2', 'e')] 

[('a', '1', 'c', '2'), ('a', '1', 'c', 'e'), ('a', '1', '2', 'e'), ('a', 'c', '2', 'e'), ('1', 'c', '2', 'e')]


### 2. Set theory

In [56]:
ash_pokedex = ['Pikachu', 'Bulbasaur', 'Koffing', 'Spearow', 'Vulpix', 'Wigglytuff', 'Zubat', 'Rattata', 'Psyduck', 'Squirtle']
misty_pokedex = ['Krabby', 'Horsea', 'Slowbro', 'Tentacool', 'Vaporeon', 'Magikarp', 'Poliwag', 'Starmie', 'Psyduck', 'Squirtle']

# Convert both lists to sets
ash_set = set(ash_pokedex)
misty_set = set(misty_pokedex)

# Find the Pokémon that exist in both sets
both = ash_set.intersection(misty_set)
print(both)

# Find the Pokémon that Ash has and Misty does not have
ash_only = ash_set.difference(misty_set)
print(ash_only)

# Find the Pokémon that are in only one set (not both)
unique_to_set = ash_set.symmetric_difference(misty_set)
print(unique_to_set)


{'Psyduck', 'Squirtle'}
{'Koffing', 'Vulpix', 'Wigglytuff', 'Rattata', 'Spearow', 'Zubat', 'Bulbasaur', 'Pikachu'}
{'Koffing', 'Krabby', 'Vulpix', 'Wigglytuff', 'Tentacool', 'Poliwag', 'Rattata', 'Magikarp', 'Horsea', 'Starmie', 'Spearow', 'Zubat', 'Pikachu', 'Vaporeon', 'Bulbasaur', 'Slowbro'}


### 3. Writing better loops

* Understand what is being done with each loop iteration
* Move one-time calculations outside (above) the loop
* Use holistic conversation outside (below) the loop
* Anything that is done **once** should be outside the loop


## Pandas

### 1. Iterating with .iterrows()

In [63]:
import pandas as pd

In [65]:
pit_df_dict = {'Team':['PIT']*5, 'League':['NL']*5,
              'Year':[2012,2011,2010,2009,2008],
              'RS':[651,610,587,636,735],
              'RA':[674, 712, 866, 768, 884],
              'W':[79, 72, 57, 62, 67],
              'G':[162, 162, 162, 161, 162],
              'Playoffs':[0, 0, 0, 0, 0]}

pit_df = pd.DataFrame(pit_df_dict)

In [70]:
%%capture
for i,row in pit_df.iterrows():
    print(row)

### 2. Iterating with .itertuples()

In [85]:
for row in pit_df.itertuples():
    print(row)

Pandas(Index=0, Team='PIT', League='NL', Year=2012, RS=651, RA=674, W=79, G=162, Playoffs=0)
Pandas(Index=1, Team='PIT', League='NL', Year=2011, RS=610, RA=712, W=72, G=162, Playoffs=0)
Pandas(Index=2, Team='PIT', League='NL', Year=2010, RS=587, RA=866, W=57, G=162, Playoffs=0)
Pandas(Index=3, Team='PIT', League='NL', Year=2009, RS=636, RA=768, W=62, G=161, Playoffs=0)
Pandas(Index=4, Team='PIT', League='NL', Year=2008, RS=735, RA=884, W=67, G=162, Playoffs=0)


In [72]:
for row in pit_df.itertuples():
    i = row.Index
    year = row.Year
    wins = row.W
    if wins > 70:
        print(i, year, wins)

0 2012 79
1 2011 72


### 3. Pandas alternative to looping

In [75]:
# Gather sum of columns
stat = pit_df[['RS','RA']].apply(sum, axis=0)
print(stat)

RS    3219
RA    3904
dtype: int64


In [76]:
# Gather sum of rows
stat = pit_df[['RS','RA']].apply(sum, axis=1)
print(stat)

0    1325
1    1322
2    1453
3    1404
4    1619
dtype: int64


In [81]:
stat = pit_df.apply(lambda row: row['W'] / row['G'], axis=1)
print(stat)

0    0.487654
1    0.444444
2    0.351852
3    0.385093
4    0.413580
dtype: float64


### Optimal pandas iterating

In [83]:
stat = pit_df['W'].values / pit_df['G'].values # very fast approach
print(stat)

[0.48765432 0.44444444 0.35185185 0.38509317 0.41358025]


In [84]:
%timeit pit_df['W'].values / pit_df['G'].values

6.32 µs ± 252 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)
