In [None]:
list  = [1,2,3]
tuple = (1,2,3)
set   = {1,2,3}
dict  = {"first":1, "second":2, "third":3}

# Sets and Dictionaries

Searching data in a list is inefficient, so here we introduce some other data structures that are more efficient for searching and organizing data.

## Set

- python rearranges elements
- removes duplicates

In [10]:
s = {"a", "c", "b",  "c"}
print(s)

{'a', 'c', 'b'}


In [11]:
"a" in s

True

In [13]:
print(set("abca"))
print(set(["a", "c", "b"]))


{'a', 'c', 'b'}
{'a', 'c', 'b'}


In [14]:
set() # is not {} because {} is a so called dictionary, see below

set()

In [15]:
sorted(s)

['a', 'b', 'c']

In [56]:
s1 = {"a", "b", "c", "d"}
s2 = {"c", "d", "r"}

In [57]:
print(s1 | s2)  # s1.union(s2)
print(s1 & s2)  # s1.intersection(s2)
print(s1 - s2)  # s1.difference(s2)
print(s1 ^ s2)  # s1.symmetric_difference(s2)

print(s1 <= s2) # s1.issubset(s2)
print(s1 >= {"a"}) # s1.issuperset(s2) 
print(s1=={"c","d","a","b"})

{'c', 'b', 'd', 'a', 'r'}
{'d', 'c'}
{'a', 'b'}
{'b', 'r', 'a'}
False
True
True


In [18]:
print(s1)
s1.add("e")
s1.remove("a")
print(s1)

{'d', 'a', 'c', 'b'}
{'e', 'c', 'b', 'd'}


Now there is no "a" in the set, so we get `KeyError`:

In [19]:
s1.remove("a")

KeyError: 'a'

## Dictionary
- data of a form `key: value`
    - keys are unique, can be any immutable type
    - values can be any type


- operations with elements run in constant time (computer knows where to find the element)
- operations with the whole dictionary runs in linear time (computer still needs to iterate over all elements)

In [25]:
# dictionary of countries and their population in millions
countries = {
    "Czechia": 11, 
    "Italy": 59,
     "Turkey": 85, 
    "Poland": 38
      }
countries["Czechia"] # value for key "Czechia"

11

In [26]:
"Uzbekistan" in countries

False

Now when we try to obtain the value of a key that does not exist, we get a KeyError:

In [27]:
countries["Uzbekistan"]

KeyError: 'Uzbekistan'

Or we can ask nicely and obtain None instead of an error:

In [28]:
a = countries.get("Uzbekistan")
b = countries.get("Uzbekistan", "did not find")
print(a)
print(b)

None
did not find


In [29]:
countries["Uzbekistan"] = 35
countries["Uzbekistan"]

35

In [35]:
countries.items()

dict_items([('Czechia', 11), ('Italy', 59), ('Turkey', 85), ('Poland', 38), ('Uzbekistan', 35)])

#### Iterating over dictionaries

In [32]:
[k for k in countries.keys()]

['Czechia', 'Italy', 'Turkey', 'Poland', 'Uzbekistan']

In [33]:
[v for v in countries.values()]

[11, 59, 85, 38, 35]

In [36]:
a = [print(k,"has about", v, "million people") for k,v in countries.items()]
print(a)

Czechia has about 11 million people
Italy has about 59 million people
Turkey has about 85 million people
Poland has about 38 million people
Uzbekistan has about 35 million people
[None, None, None, None, None]


#### Creating lists using comprehensions

In [37]:
{k for k in range(5)}

{0, 1, 2, 3, 4}

In [41]:
powers = {x: x**3 for x in range(5)}
powers[3]

27

In [None]:
powers[3]

#### Initializing dictionaries
For computing the frequency of words in a text, we can use a dictionary to store the words and their counts `{word: count}`.
- If the word is not in the dictionary, we add it with a count of 1. If it is already in the dictionary, we increment its count.
- this can be achieved using a `defaultdict` from the `collections` module.

In [45]:
from collections import defaultdict
d = defaultdict(int) # which function should be called empty to obtain a default value? int()=0
print(d)
d["something"] # returns 0, because we set the default type to int

defaultdict(<class 'int'>, {})


0

If we choose different type, another typical choice is list, we get

In [43]:
l = defaultdict(list)
l["a"]

[]

In [44]:
d["a"] += 1
d["d"] += 2
print(d)
print(list(d))
print(list(d.items()))

defaultdict(<class 'int'>, {'something': 0, 'a': 1, 'd': 2})
['something', 'a', 'd']
[('something', 0), ('a', 1), ('d', 2)]


In [46]:
word_occurencies = defaultdict(int)
for w in "hello hello world worldy world".split():
    word_occurencies[w] += 1 # without default dict, this could be written as d[w] = d.get(w, 0) + 1
word_occurencies.items()

dict_items([('hello', 2), ('world', 2), ('worldy', 1)])

In [48]:
word_lengths = defaultdict(list)
for word in "hello hello my something world".split():
    word_lengths[len(word)].append(word)

word_lengths.items()
word_lengths[5]

['hello', 'hello', 'world']

#### Complicated example of a dictionary

In [51]:
contacts = [
    {
        "name": "John",
        "email": ["john123@seznam.cz", "john666@de.com"],
        "adress": {
            "street": "Karlovo namesti",
            "number": 1
        }
    },
    {
        "name": "Dohn",
        "email": ["do@h.n"],
        "adress": {
            "street": "Somewhere",
            "number": 11
        }
    }
]
print(contacts[0]["email"][1])
print(contacts[1]["adress"]["street"])

john666@de.com
Somewhere


In [52]:
# find Jane in contacts and print her email, without knowing the index of Jane
for contact in contacts:
    if contact["name"] == "Dohn":
        print(contact["email"])
        break
    
# and using list comprehension
[contact["email"] for contact in contacts if contact["name"] == "Dohn"]

['do@h.n']


[['do@h.n']]

### Choose function based on a string

In [53]:
def f2():
    print(5**2)
def f3():
    print(5**3)

function_choice = {
    "s": f2,
    "t": f3
}

def execute(order: str):
    if order in function_choice:
        function_choice[order]()
    else:
        print("I do not know this order!")


while True:
    order = input("order: ")
    if order == "end":
        break
    execute(order)

25
125
125
25
