# Lists and Other Containers
Python's builtins include helpful container types, which can be treated as sequences (iterable, and accessible by index)

- list
- tuple (like a list, but immutable)
- dict (mapping of keys to values)

Python also includes sets, which can be iterated over, but cannot be referenced by index. Iterating over a set gives the set values in no particular order.

## Use sets to build list of unique items (when the items are hashable)

In [5]:
a = "The quick brown fox jumps over the lazy dog."
print(f"{a=!r}")

uniq_chars = list(set(a))
print(f"{uniq_chars=}")

a='The quick brown fox jumps over the lazy dog.'
uniq_chars=['u', 't', 'z', 'T', 'w', 'o', 'e', 'j', 'i', 's', 'y', 'm', 'l', 'g', 'v', 'q', 'r', 'h', 'p', 'k', 'b', 'd', 'x', 'a', 'c', ' ', 'n', '.', 'f']


In [6]:
# or if you must preserve order
seen = set()
uniq_chars = []
for c in a:
    # testing for set membership is O(1)
    if c not in seen:
        seen.add(c)
        uniq_chars.append(c)
print(f"{uniq_chars=}")

uniq_chars=['T', 'h', 'e', ' ', 'q', 'u', 'i', 'c', 'k', 'b', 'r', 'o', 'w', 'n', 'f', 'x', 'j', 'm', 'p', 's', 'v', 't', 'l', 'a', 'z', 'y', 'd', 'g', '.']


In [7]:
# new since Python 3.7 - dicts preserve insertion order
uniq_chars = list(dict.fromkeys(a))
print(f"{uniq_chars=}")

uniq_chars=['T', 'h', 'e', ' ', 'q', 'u', 'i', 'c', 'k', 'b', 'r', 'o', 'w', 'n', 'f', 'x', 'j', 'm', 'p', 's', 'v', 't', 'l', 'a', 'z', 'y', 'd', 'g', '.']


## Unpacking values from a list into separate variables <br>(_sequence unpacking_)

In [22]:
lst = [1, 2, 3]
a = lst[0]
b = lst[1]
c = lst[2]
print(lst, "->", a, b, c)

[1, 2, 3] -> 1 2 3


In [16]:
# use sequence unpacking
a, b, c = lst
print(lst, "->", a, b, c)

[1, 2, 3] -> 1 2 3


In [23]:
# can be fragile
lst = [1, 2]
a, b, c = lst

ValueError: not enough values to unpack (expected 3, got 2)

In [24]:
# if lst might be longer
lst = [1, 2, 3, 4, 5, 6]
a, b, c, *_ = lst
print(lst, "->", a, b, c)

[1, 2, 3, 4, 5, 6] -> 1 2 3


In [25]:
# if lst might be shorter (or longer)
lst = [1, 2]
a, b, c, *_ = (*lst, 0, 0, 0)
print(lst, "->", a, b, c)

[1, 2] -> 1 2 0


### Use unpacking to swap two values

In [46]:
#Non-Python
a, b = 1, 2
print(a, b)

tmp = a
a = b
b = tmp
print(a, b)

# Python (uses tuple unpacking) - put them back the way they were
a, b = b, a
print(a, b)

1 2
2 1
1 2


## Iterating over the items in a list

In [62]:
seq = 'a list of words varying in length'.split()

# explicit indexing - common when using Python after developing in C
for i in range(len(seq)):
    # do something with seq[i]
    print(seq[i].title(), end=" ")
print()

# better - just iterate over the sequence
for item in seq:
    # do something with item
    print(item.title(), end=" ")

A List Of Words Varying In Length 
A List Of Words Varying In Length 

In [63]:
# if you absolutely need the index, use enumerate()
for i, item in enumerate(seq):
    # i is the index, item is seq[i]
    # i goes from 0 to len(seq)-1
    print(i, item)

0 a
1 list
2 of
3 words
4 varying
5 in
6 length


In [64]:
# enumerate takes an optional 'start' argument
for i, item in enumerate(seq, start=1):
    # i now goes from 1 to len(seq)
    print(i, item)

1 a
2 list
3 of
4 words
5 varying
6 in
7 length


## Never update a list while iterating over it

In [43]:
seq = 'a list of the words varying in its length'.split()
print(seq)

# remove 3-letter words or shorter
for i, item in enumerate(seq):
    print(i, item, end=" ")
    if len(item) <= 3:
        print("X")
        del seq[i]  # <-- bug! will skip over "the" and "its"
    else:
        print()
print(seq)
print(f"All words are longer than 3 characters: {all(len(wd) > 3 for wd in seq)}")

# use asserts in test code, but not production code
assert all(len(wd) > 3 for wd in seq), "Found at least one word <= 3 characters"

['a', 'list', 'of', 'the', 'words', 'varying', 'in', 'its', 'length']
0 a X
1 of X
2 words 
3 varying 
4 in X
5 length 
['list', 'the', 'words', 'varying', 'its', 'length']
All words are longer than 3 characters: False


AssertionError: Found at least one word <= 3 characters

In [42]:
# create a new list containing just what you want
seq = 'a list of the words varying in its length'.split()

# list comprehension is the perfect tool
seq = [word for word in seq if len(word) > 3]
print(seq)
print(f"All words are longer than 3 characters: {all(len(wd) > 3 for wd in seq)}")
assert all(len(wd) > 3 for wd in seq), "Found at least one word <= 3 characters"

['list', 'words', 'varying', 'length']
All words are longer than 3 characters: True


## Updating lists passed to functions

In [31]:
# assigning to the argument inside the function doesn't change the value in the caller
def remove_short_words(seq_arg: list[str], min_length: int = 4) -> None:
    """Update sequence in place, removing words shorter than min_length"""
    seq_arg = [word for word in seq if len(word) >= min_length]
    # unfortunately, this statement defines a new local variable 'seq_arg',
    # so the list in the caller is unchanged

seq = 'a list of the words varying in length'.split()
print(seq)
remove_short_words(seq)
print(seq)

['a', 'list', 'of', 'the', 'words', 'varying', 'in', 'length']
['a', 'list', 'of', 'the', 'words', 'varying', 'in', 'length']


In [32]:
# use slice assignment inside function to update the given list
def remove_short_words(seq_arg: list[str], min_length: int = 4) -> None:
    """Update sequence in place, removing words shorter than min_length"""
    seq_arg[:] = [word for word in seq if len(word) >= min_length]
    # this statement modifies the contents of the supplied list argument

seq = 'a list of the words varying in length'.split()
print(seq)
remove_short_words(seq)
print(seq)

['a', 'list', 'of', 'the', 'words', 'varying', 'in', 'length']
['list', 'words', 'varying', 'length']


## Iterating over dicts

In [69]:
foghorn_leghorn = "Now who's responsible I say who's responsible for this unwarranted attack upon my person?"
dd = {wd: position for position, wd in enumerate(foghorn_leghorn.split())}

# iterate over dict gives the dict's keys, in the order they were first inserted
for item in dd:
    print(item)

Now
who's
responsible
I
say
for
this
unwarranted
attack
upon
my
person?


In [68]:
# use dict.items() to iterate over keys and values
for wd, last_position in dd.items():
    print(f"{wd=!r} {last_position=}")

wd='Now' last_position=0
wd="who's" last_position=5
wd='responsible' last_position=6
wd='I' last_position=3
wd='say' last_position=4
wd='for' last_position=7
wd='this' last_position=8
wd='unwarranted' last_position=9
wd='attack' last_position=10
wd='upon' last_position=11
wd='my' last_position=12
wd='person?' last_position=13


## Iterating over two sequences

In [71]:
items = "eggs milk bananas bread".split()
aisles = "dairy dairy produce bakery".split()

# using indexing
for i, item in enumerate(items):
    aisle = aisles[i]
    print(f"{aisle} - {item}")

dairy - eggs
dairy - milk
produce - bananas
bakery - bread


In [72]:
# iterate over both lists together using zip()
for item, aisle in zip(items, aisles):
    print(f"{aisle} - {item}")

dairy - eggs
dairy - milk
produce - bananas
bakery - bread


### Extra credit - use `collections.defaultdict` to group items by aisle

In [79]:
import collections

# using defaultdict, new keys will be initialized with the given factory method
# in this case, a new aisle entry will get initialized with an empty list
aisle_items = collections.defaultdict(list)

for item, aisle in zip(items, aisles):
    aisle_items[aisle].append(item)

for aisle, item_list in aisle_items.items():
    print(f"{aisle} - {item_list}")

dairy - ['eggs', 'milk']
produce - ['bananas']
bakery - ['bread']
