# python `set`

## Experiment

In [None]:
import random

BIG_LIST = [random.randrange(3000) for _ in range(15_000)]
BAD_NUMBERS = [random.randrange(3000) for _ in range(1_000)]

In [None]:
def num_of_bad_numbers_v1():
    cnt = 0
    for el in BIG_LIST:
        if el in BAD_NUMBERS:
            cnt += 1
    return cnt

In [None]:
def num_of_bad_numbers_v2():
    BAD_NUMBERS_SET = set(BAD_NUMBERS) # this is a set made from that list
    cnt = 0
    for el in BIG_LIST:
        if el in BAD_NUMBERS_SET:
            cnt += 1
    return cnt 

In [None]:
%%timeit
num_of_bad_numbers_v1()

In [None]:
%%timeit
num_of_bad_numbers_v2()

Conclusion: `obj in set` is much more efficient than `obj in list`.

## Motivation

Want fast `in` operation (aka "contains")

In [None]:
def contains_v1(lst, obj) -> bool:
    # this is how `obj in list` works under the hood
    for el in lst:
        if el == obj:
            return True
    return False

def contains_v2(lst, obj) -> bool:
    return obj in lst

lst1 = [1, 5, 4, 7, 8]
obj1 = 4; obj2 = 2
print(contains_v1(lst1, obj1))
print(contains_v1(lst1, obj2))
print(contains_v2(lst1, obj1))
print(contains_v2(lst1, obj2))

# `obj in set` operation uses hashing;
# it does not go through the whole data structure on every call

## `set` type

**Definition**: `set` is a mutable unordered collection of unique immutable elements.

- _mutable_ - we can change them (e.g. adding/removing elements);
- _unordered_ - elements do not have indices;
- _unique elements_ - speaks for itself;
- _immutable elements_ - cannot add objects of changable types (`list`s, `dict`s, other `set`s.)


Let's look at those properties using some examples.

In [None]:
st1 = {3, 5, 2, 6} # this is a set
print(st1, type(st1))

In [None]:
# mutable:
st1.add(7)
print(st1)

st1.remove(5)
print(st1)

In [None]:
# unordered:
print({1, 2, 3, 4} == {3, 2, 4, 1}) # the order does not matter

print([1, 2, 3, 4] == [3, 2, 4, 1]) # this does not hold for lists

In [None]:
# elements: unique
st2 = {3, 6}
print(st2)
st2.add(3)
st2.add(6)
print(st2) # no difference

st3 = {3, 3, 3, 3, 6, 6, 6} # removes duplicates on initialization
print(st3)
print(st2 == st3) 

In [None]:
# elements: immutable
'''
immutable: int, str, tuple, ...
mutable: list, set, dict, ...
'''
st4 = {'a', 'c', 'e'} # this is ok
print(st4)
st5 = {(3, 4), (1, 4), (3, 5)} # this is also ok
print(st5)

st6 = {[3], [4, 1], [5, 6, 2]} # ERROR (immutable objects are not hashable)
# try `hash([1, 2, 3])`

## `set` operations

In [52]:
set1 = {1, 4, 6, 3, 7}
set2 = {2, 3, 4, 9, 12}
print(set1, set2)

{1, 3, 4, 6, 7} {2, 3, 4, 9, 12}


Common python operations

**can** do:

In [53]:
# compare equality:
print(set1 == set2)

False


In [54]:
# find length
print(len(set1))

5


In [55]:
# iterate over
for el in set1:
    print(el, end=' ')

1 3 4 6 7 

In [50]:
# find union: a new set which contains everything from both set1 and set2
set1_or_set2 = set1 | set2 # or `set1.union(set2)`
print(set1_or_set2)

{1, 2, 3, 4, 6, 7, 9, 12}


In [51]:
# find intersection: a new set which contains elements which are present in both sets
set1_and_set2 = set1 & set2 # or `set1.intersection(set2)`
print(set1_and_set2)

{3, 4}


**cannot** do:

In [None]:
# subscribe
set1[1]

In [56]:
# add
set1 + set2

TypeError: unsupported operand type(s) for +: 'set' and 'set'

In [58]:
# divide and multiply
set1 * set2
# set1 / set2

TypeError: unsupported operand type(s) for /: 'set' and 'set'