# Loops in Python
As shown in `basics.ipynb` we can speed up the execution time of a loop by using list comprehension or generator expression. Here we will look at a few other ways to speed up the execution time of a loop or even eliminate the need for a loop entirely.

- Counter
- itertools

## Counting occurrences in loop vs collections.Counter

In [1]:
from itertools import combinations
from collections import Counter
import numpy as np
np.random.seed(1)

Prepare some example data:

In [2]:
zodiacs = ('Aries', 'Taurus', 'Gemini', 'Cancer', 'Leo', 'Virgo', 'Libra', 'Scorpio', 'Sagittarius', 'Capricorn', 'Aquarius', 'Pisces')
zodiac_weights = np.random.random(12)
zodiac_probabilities = zodiac_weights / zodiac_weights.sum()

data_big = np.random.choice(zodiacs, p=zodiac_probabilities, size=1000).tolist()

data_big

['Taurus',
 'Pisces',
 'Aries',
 'Capricorn',
 'Libra',
 'Sagittarius',
 'Taurus',
 'Taurus',
 'Aquarius',
 'Pisces',
 'Cancer',
 'Capricorn',
 'Pisces',
 'Pisces',
 'Aries',
 'Aries',
 'Taurus',
 'Pisces',
 'Taurus',
 'Libra',
 'Pisces',
 'Sagittarius',
 'Capricorn',
 'Cancer',
 'Capricorn',
 'Aquarius',
 'Aries',
 'Aquarius',
 'Pisces',
 'Aquarius',
 'Cancer',
 'Aquarius',
 'Taurus',
 'Scorpio',
 'Pisces',
 'Cancer',
 'Cancer',
 'Taurus',
 'Aries',
 'Capricorn',
 'Taurus',
 'Taurus',
 'Scorpio',
 'Aries',
 'Sagittarius',
 'Taurus',
 'Sagittarius',
 'Capricorn',
 'Taurus',
 'Libra',
 'Capricorn',
 'Libra',
 'Aries',
 'Sagittarius',
 'Capricorn',
 'Scorpio',
 'Pisces',
 'Sagittarius',
 'Pisces',
 'Taurus',
 'Taurus',
 'Aquarius',
 'Libra',
 'Taurus',
 'Pisces',
 'Leo',
 'Aquarius',
 'Capricorn',
 'Pisces',
 'Capricorn',
 'Aquarius',
 'Leo',
 'Cancer',
 'Pisces',
 'Libra',
 'Pisces',
 'Capricorn',
 'Capricorn',
 'Taurus',
 'Pisces',
 'Scorpio',
 'Sagittarius',
 'Libra',
 'Taurus',
 'Pis

Now we count all occurrences of each zodiac sign in the data:

In [3]:
# Counting with for loop
def count_dict(data):
    counts = {}
    for item in data:
        if item in counts:
            counts[item] += 1
        else:
            counts[item] = 1
    return counts
zodiac_counts = count_dict(data_big)
print(zodiac_counts)

# Counting with Counter
zodiac_counts = Counter(data_big)
print(zodiac_counts)

{'Taurus': 173, 'Pisces': 161, 'Aries': 95, 'Capricorn': 123, 'Libra': 50, 'Sagittarius': 95, 'Aquarius': 107, 'Cancer': 62, 'Scorpio': 82, 'Leo': 34, 'Virgo': 18}
Counter({'Taurus': 173, 'Pisces': 161, 'Capricorn': 123, 'Aquarius': 107, 'Aries': 95, 'Sagittarius': 95, 'Scorpio': 82, 'Cancer': 62, 'Libra': 50, 'Leo': 34, 'Virgo': 18})


Both codes give the same result. However the code that uses Counter is clean, simple, easy to understand and the result is even sorted. How about execution time?

In [4]:
%timeit -r10 -n10000 count_dict(data_big)

%timeit -r10 -n10000 Counter(data_big)

73.6 μs ± 590 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
31.9 μs ± 471 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)


## Generating all possible combinations in loop vs itertools.combinations

In [5]:
help(combinations)

Help on class combinations in module itertools:

class combinations(builtins.object)
 |  combinations(iterable, r)
 |  
 |  Return successive r-length combinations of elements in the iterable.
 |  
 |  combinations(range(4), 3) --> (0,1,2), (0,1,3), (0,2,3), (1,2,3)
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  __setstate__(...)
 |      Set state information for unpickling.
 |  
 |  __sizeof__(...)
 |      Returns size in memory, in bytes.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.



In [6]:
# Combinations with for loops
def combinations_loop(data):
    combinations = []
    for i in data:
        for j in data:
            if i == j:
                continue
            if ((i, j) not in combinations) and ((j, i) not in combinations):
                combinations.append((i, j))
    return combinations
combos_loop = combinations_loop(zodiacs)
print(combos_loop)
print(len(combos_loop))

# Combinations with combinations
combos_itertools = [*combinations(zodiacs, 2)]
print(combos_itertools)
print(len(combos_itertools))

print(combos_loop == combos_itertools)

[('Aries', 'Taurus'), ('Aries', 'Gemini'), ('Aries', 'Cancer'), ('Aries', 'Leo'), ('Aries', 'Virgo'), ('Aries', 'Libra'), ('Aries', 'Scorpio'), ('Aries', 'Sagittarius'), ('Aries', 'Capricorn'), ('Aries', 'Aquarius'), ('Aries', 'Pisces'), ('Taurus', 'Gemini'), ('Taurus', 'Cancer'), ('Taurus', 'Leo'), ('Taurus', 'Virgo'), ('Taurus', 'Libra'), ('Taurus', 'Scorpio'), ('Taurus', 'Sagittarius'), ('Taurus', 'Capricorn'), ('Taurus', 'Aquarius'), ('Taurus', 'Pisces'), ('Gemini', 'Cancer'), ('Gemini', 'Leo'), ('Gemini', 'Virgo'), ('Gemini', 'Libra'), ('Gemini', 'Scorpio'), ('Gemini', 'Sagittarius'), ('Gemini', 'Capricorn'), ('Gemini', 'Aquarius'), ('Gemini', 'Pisces'), ('Cancer', 'Leo'), ('Cancer', 'Virgo'), ('Cancer', 'Libra'), ('Cancer', 'Scorpio'), ('Cancer', 'Sagittarius'), ('Cancer', 'Capricorn'), ('Cancer', 'Aquarius'), ('Cancer', 'Pisces'), ('Leo', 'Virgo'), ('Leo', 'Libra'), ('Leo', 'Scorpio'), ('Leo', 'Sagittarius'), ('Leo', 'Capricorn'), ('Leo', 'Aquarius'), ('Leo', 'Pisces'), ('Virgo'

In [7]:
%timeit -r10 -n10000 combinations_loop(zodiacs)

%timeit -r10 -n10000 [*combinations(zodiacs, 2)]

149 μs ± 2.29 μs per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
1.36 μs ± 30.6 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)


Difference in execution time is outstanding.

## Comparing two list of objects using set theory
Examples of set theory operations

Prepare some example data:

In [8]:
data1 = np.random.choice(zodiacs, replace=False, size=8).tolist()
data2 = np.random.choice(zodiacs, replace=False, size=8).tolist()

print(data1)
print(data2)

['Cancer', 'Capricorn', 'Virgo', 'Scorpio', 'Pisces', 'Libra', 'Sagittarius', 'Gemini']
['Scorpio', 'Gemini', 'Leo', 'Cancer', 'Taurus', 'Virgo', 'Capricorn', 'Pisces']


### Find common elements
Find intersection of two sets - elements that are present in both sets

In [9]:
# For loop solution
def find_common_elements(data1, data2):
    common = []
    for i in data1:
        for j in data2:
            if i == j:
                common.append(i)
    return common
commons_loop = find_common_elements(data1, data2)
print(commons_loop)

# Set solution with & operator
commons_operator = set(data1) & set(data2)
print(commons_operator)

# Set solution with intersection
commons_method = set(data1).intersection(data2)
print(commons_method)

# Check if they have the same elements
print(set(commons_loop) == commons_operator == commons_method)

['Cancer', 'Capricorn', 'Virgo', 'Scorpio', 'Pisces', 'Gemini']
{'Virgo', 'Pisces', 'Scorpio', 'Cancer', 'Gemini', 'Capricorn'}
{'Pisces', 'Cancer', 'Scorpio', 'Virgo', 'Gemini', 'Capricorn'}
True


In [11]:
%timeit -r10 -n10000 find_common_elements(data1, data2)

%timeit -r10 -n10000 set(data1) & set(data2)

%timeit -r10 -n10000 set(data1).intersection(data2)

1.67 μs ± 53.1 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
539 ns ± 17.2 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
435 ns ± 5.57 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)


### Find differences
Find difference of two sets - elements that are present in first set but not in the second

In [12]:
# For loop
def find_difference(data1, data2):
    difference = []
    for i in data1:
        if i not in data2:
            difference.append(i)
    return difference
difference_loop = find_difference(data1, data2)
print(difference_loop)

# Set solution with - operator
difference_operator = set(data1) - set(data2)
print(difference_operator)

# Set solution with difference
difference_method = set(data1).difference(data2)
print(difference_method)

# Check if they have the same elements
print(set(difference_loop) == difference_operator == difference_method)

['Libra', 'Sagittarius']
{'Sagittarius', 'Libra'}
{'Sagittarius', 'Libra'}
True


In [13]:
%timeit -r10 -n10000 find_difference(data1, data2)

%timeit -r10 -n10000 set(data1) - set(data2)

%timeit -r10 -n10000 set(data1).difference(data2)

644 ns ± 23.7 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
451 ns ± 14 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
424 ns ± 8.73 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)


### Find symmetric difference
Find symmetric difference of two sets - elements that are present only in one of the sets and not in the other

In [14]:
# For loop
def find_symmetric_difference(data1, data2):
    symmetric_difference = []
    for i in data1:
        if i not in data2:
            symmetric_difference.append(i)
    for i in data2:
        if i not in data1:
            symmetric_difference.append(i)
    return symmetric_difference
symmetric_difference_loop = find_symmetric_difference(data1, data2)
print(symmetric_difference_loop)

# Set solution with ^ operator
symmetric_difference_operator = set(data1) ^ set(data2)
print(symmetric_difference_operator)

# Set solution with symmetric_difference
symmetric_difference_method = set(data1).symmetric_difference(data2)
print(symmetric_difference_method)

# Check if they have the same elements
print(set(symmetric_difference_loop) == symmetric_difference_operator == symmetric_difference_method)

['Libra', 'Sagittarius', 'Leo', 'Taurus']
{'Sagittarius', 'Leo', 'Libra', 'Taurus'}
{'Sagittarius', 'Leo', 'Libra', 'Taurus'}
True


In [15]:
%timeit -r10 -n10000 find_symmetric_difference(data1, data2)

%timeit -r10 -n10000 set(data1) ^ set(data2)

%timeit -r10 -n10000 set(data1).symmetric_difference(data2)

1.22 μs ± 66.7 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
570 ns ± 14.1 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
441 ns ± 5.37 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)


### Find union
Find union of two sets - elements that are present in both sets

In [16]:
# For loop
def find_union(data1, data2):
    union = []
    for i in data1:
        if i not in union:
            union.append(i)
    for i in data2:
        if i not in union:
            union.append(i)
    return union
union_loop = find_union(data1, data2)
print(union_loop)

# Set solution with | operator
union_operator = set(data1) | set(data2)
print(union_operator)

# Set solution with union
union_method = set(data1).union(data2)
print(union_method)

# Check if they have the same elements
print(set(union_loop) == union_operator == union_method)

['Cancer', 'Capricorn', 'Virgo', 'Scorpio', 'Pisces', 'Libra', 'Sagittarius', 'Gemini', 'Leo', 'Taurus']
{'Sagittarius', 'Pisces', 'Cancer', 'Gemini', 'Leo', 'Scorpio', 'Libra', 'Taurus', 'Virgo', 'Capricorn'}
{'Sagittarius', 'Pisces', 'Cancer', 'Gemini', 'Leo', 'Scorpio', 'Libra', 'Taurus', 'Virgo', 'Capricorn'}
True


In [17]:
%timeit -r10 -n10000 find_union(data1, data2)

%timeit -r10 -n10000 set(data1) | set(data2)

%timeit -r10 -n10000 set(data1).union(data2)

1.14 μs ± 33.9 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
520 ns ± 17.6 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
387 ns ± 6.83 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)


### Find uniques
Find unique elements

In [21]:
# For loop
def find_uniques(data):
    uniques = []
    for i in data:
        if i not in uniques:
            uniques.append(i)
    return uniques
uniques_loop = find_uniques(data_big)
print(uniques_loop)

# Set solution
uniques_operator = set(data_big)
print(uniques_operator)

# Check if they have the same elements
print(set(uniques_loop) == uniques_operator)

['Taurus', 'Pisces', 'Aries', 'Capricorn', 'Libra', 'Sagittarius', 'Aquarius', 'Cancer', 'Scorpio', 'Leo', 'Virgo']
{'Virgo', 'Sagittarius', 'Pisces', 'Aries', 'Aquarius', 'Leo', 'Libra', 'Taurus', 'Scorpio', 'Cancer', 'Capricorn'}
True


In [22]:
%timeit -r10 -n10000 find_uniques(data_big)

%timeit -r10 -n10000 set(data_big)

62 μs ± 636 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
11.5 μs ± 551 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)


### Membership testing in list, tuple and set - speed comparison

In [18]:
data_list = list(zodiacs)
data_tuple = tuple(zodiacs)
data_set = set(zodiacs)

In [19]:
%timeit -r10 -n10000 "Leo" in data_list

%timeit -r10 -n10000 "Leo" in data_tuple

%timeit -r10 -n10000 "Leo" in data_set

46.5 ns ± 1.3 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
44.5 ns ± 0.911 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)
19.1 ns ± 0.385 ns per loop (mean ± std. dev. of 10 runs, 10,000 loops each)


## Eliminating loops
Replace loops with list comprehensions, maps and numpy

### Sum elements in rows

In [67]:
big_array_2d = [[i for i in range(500)] for j in range(500)]

# For loop solution
def row_totals(numbers_2d):
    totals = []
    for row in numbers_2d:
        totals.append(sum(row))
    return totals
row_totals_loop = row_totals(big_array_2d)
print(row_totals_loop)

# List comprehension solution
row_totals_list_comprehension = [sum(row) for row in big_array_2d]
print(row_totals_list_comprehension)

# Map solution
row_totals_map = [*map(sum, big_array_2d)]
print(row_totals_map)

# Numpy solution
np_numbers_2d = np.array(big_array_2d)
row_totals_numpy = np_numbers_2d.sum(axis=1).tolist()
print(row_totals_numpy)

# Check if they have the same elements
print(row_totals_loop == row_totals_list_comprehension == row_totals_map == row_totals_numpy)

[124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750, 124750,

In [66]:
%timeit -r10 -n1000 row_totals(big_array_2d)

%timeit -r10 -n1000 [sum(row) for row in big_array_2d]

%timeit -r10 -n1000 [*map(sum, big_array_2d)]

%timeit -r10 -n1000 np_numbers_2d.sum(axis=1)

1.33 ms ± 22.2 μs per loop (mean ± std. dev. of 10 runs, 1,000 loops each)
1.33 ms ± 18.1 μs per loop (mean ± std. dev. of 10 runs, 1,000 loops each)
1.32 ms ± 29.7 μs per loop (mean ± std. dev. of 10 runs, 1,000 loops each)
67.8 μs ± 3.33 μs per loop (mean ± std. dev. of 10 runs, 1,000 loops each)


### Sum elements in columns

In [72]:
# For loop solution
def column_totals(numbers_2d):
    totals = []
    for i in range(len(numbers_2d[0])):
        total = 0
        for row in numbers_2d:
            total += row[i]
        totals.append(total)
    return totals
column_totals_loop = column_totals(big_array_2d)
print(column_totals_loop)

# List comprehension solution
column_totals_list_comprehension = [sum(row[i] for row in big_array_2d) for i in range(len(big_array_2d[0]))]
print(column_totals_list_comprehension)

# Map solution
column_totals_map = [*map(sum, zip(*big_array_2d))]
print(column_totals_map)

# Numpy solution
np_numbers_2d = np.array(big_array_2d)
column_totals_numpy = np_numbers_2d.sum(axis=0).tolist()
print(column_totals_numpy)

# Check if they have the same elements
print(column_totals_loop == column_totals_list_comprehension == column_totals_map == column_totals_numpy)

[0, 500, 1000, 1500, 2000, 2500, 3000, 3500, 4000, 4500, 5000, 5500, 6000, 6500, 7000, 7500, 8000, 8500, 9000, 9500, 10000, 10500, 11000, 11500, 12000, 12500, 13000, 13500, 14000, 14500, 15000, 15500, 16000, 16500, 17000, 17500, 18000, 18500, 19000, 19500, 20000, 20500, 21000, 21500, 22000, 22500, 23000, 23500, 24000, 24500, 25000, 25500, 26000, 26500, 27000, 27500, 28000, 28500, 29000, 29500, 30000, 30500, 31000, 31500, 32000, 32500, 33000, 33500, 34000, 34500, 35000, 35500, 36000, 36500, 37000, 37500, 38000, 38500, 39000, 39500, 40000, 40500, 41000, 41500, 42000, 42500, 43000, 43500, 44000, 44500, 45000, 45500, 46000, 46500, 47000, 47500, 48000, 48500, 49000, 49500, 50000, 50500, 51000, 51500, 52000, 52500, 53000, 53500, 54000, 54500, 55000, 55500, 56000, 56500, 57000, 57500, 58000, 58500, 59000, 59500, 60000, 60500, 61000, 61500, 62000, 62500, 63000, 63500, 64000, 64500, 65000, 65500, 66000, 66500, 67000, 67500, 68000, 68500, 69000, 69500, 70000, 70500, 71000, 71500, 72000, 72500, 7

In [73]:
%timeit -r10 -n100 column_totals(big_array_2d)

%timeit -r10 -n100 [sum(row[i] for row in big_array_2d) for i in range(len(big_array_2d[0]))]

%timeit -r10 -n100 [*map(sum, zip(*big_array_2d))]

%timeit -r10 -n100 np_numbers_2d.sum(axis=0)

12.1 ms ± 169 μs per loop (mean ± std. dev. of 10 runs, 100 loops each)
14.9 ms ± 207 μs per loop (mean ± std. dev. of 10 runs, 100 loops each)
3.21 ms ± 454 μs per loop (mean ± std. dev. of 10 runs, 100 loops each)
122 μs ± 6.27 μs per loop (mean ± std. dev. of 10 runs, 100 loops each)
