The objective of this lesson is to review Python's main native data structures. In the previous class we reviewed lists. These are extremely flexible and useful however, once we are trying to scale to more complex data structures, we need to have other resources. Today we will look at:

* Dictionaries
* Tuples
* Sets

In [None]:
# we can think of dictionaries in one of two (equivalent) ways
# 1- as a "list" that is indexed not on integers but on arbitrary types
# 2- as a lookup table

#e.g. we saw in last class the list of months
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']

#can be used to extract, e.g. "month number 4", or "the last month"
print(months[3])
print(months[-1])

Apr
Dec


In [None]:
# but suppose a case where you have a small company where a monthly task (say inventory) is rotated by the several employees
# in this case I don't care about the index of the month, I care about which month corresponds to "Jack" or "Stan"

my_dict = {}

my_dict['Jack'] = 'Jan'
my_dict['Stan'] = 'Apr'
my_dict['Roy'] = 'Jul'
my_dict['Gus'] = 'Oct'
my_dict['Lee'] = 'Dec'


print(my_dict)

{'Jack': 'Jan', 'Stan': 'Apr', 'Roy': 'Jul', 'Gus': 'Oct', 'Lee': 'Dec'}


In [None]:
my_dict['Roy']

'Jul'

In [None]:
#my_dict.keys()
#my_dict.values()

In [None]:
# notation of dictionary key:value
# notice order of output

my_dict = {}
my_dict['David'] = "Data Education Team"
my_dict['Jose'] = "Data Education Team & Data Student"
my_dict['Euclides'] = "Data Student"
my_dict['Miguel'] = "WebDev Education Team"
my_dict['Munique'] = "Data Student & Administration Team"

print(my_dict)

{'David': 'Data Education Team', 'Jose': 'Data Education Team & Data Student', 'Euclides': 'Data Student', 'Miguel': 'WebDev Education Team', 'Munique': 'Data Student & Administration Team'}


In [None]:
my_dict['David']

'Data Education Team'

In [None]:
#keys, items, values

In [None]:
my_dict.keys()

dict_keys(['David', 'Jose', 'Euclides', 'Miguel', 'Munique'])

In [None]:
#just like with 'range' before, you can "cast" dictionary keys to a list

my_list = list(mydict.keys())
my_list

['Jose', 'Euclides', 'Miguel', 'Munique']

In [None]:
# lets see some properties of the keys of a dictionary -> type? indexable? iterable?

# own type
type(my_dict.keys())

# iterable -> check
#for key in my_dict.keys():
#  print(key)

# but each individual key retains its type
#for key in my_dict.keys():
#  print(type(key))

# indexed? Not so much
# my_dict.keys()[2]

dict_keys

In [None]:
# we can do the same things with values
my_dict.values()

dict_values(['Data Education Team', 'Data Education Team & Data Student', 'Data Student', 'WebDev Education Team', 'Data Student & Administration Team'])

In [None]:
# and items
my_dict.items()

dict_items([('David', 'Data Education Team'), ('Jose', 'Data Education Team & Data Student'), ('Euclides', 'Data Student'), ('Miguel', 'WebDev Education Team'), ('Munique', 'Data Student & Administration Team')])

In [None]:
# new items don't have to ype match (but they should)
my_dict[127330414] = "Webdev Student"
my_dict

{127330414: 'Webdev Student',
 'David': 'Data Education Team',
 'Euclides': 'Data Student',
 'Jose': 'Data Education Team & Data Student',
 'Miguel': 'WebDev Education Team',
 'Munique': 'Data Student & Administration Team'}

In [None]:
# pop still works on indexes (but now indexes are the keys) and remove still works on values
a = my_dict.pop("David")
print(a)
print(my_dict)

Data Education Team
{'Jose': 'Data Education Team & Data Student', 'Euclides': 'Data Student', 'Miguel': 'WebDev Education Team', 'Munique': 'Data Student & Administration Team', 127330414: 'Webdev Student'}


In [None]:
# remove does not work on dictionaries
my_dict.remove("Data Student")
print(my_dict)

AttributeError: ignored

In [None]:
# let's create a dictionary that counts the number of occurences of each letter in a string
# (i.e. you give it a letter and it tells you back how often it appears)
sentence = "one ring to rule them all one ring to find them one ring to bring them all and in the darkness bind them"

In [None]:
count_letters = {}
for letter in sentence:
  if letter in count_letters.keys():
    count_letters[letter] += 1
  else:
    count_letters[letter] = 1

print(count_letters)

{'o': 6, 'n': 12, 'e': 10, ' ': 22, 'r': 6, 'i': 7, 'g': 4, 't': 8, 'u': 1, 'l': 5, 'h': 5, 'm': 4, 'a': 4, 'f': 1, 'd': 4, 'b': 2, 'k': 1, 's': 2}


In [None]:
count_letters['a']

4

In [None]:
# do it yourself: create a dictionary that counts the number of occurences of each word in a string
word_list = sentence.split(" ")
print(word_list)

['one', 'ring', 'to', 'rule', 'them', 'all', 'one', 'ring', 'to', 'find', 'them', 'one', 'ring', 'to', 'bring', 'them', 'all', 'and', 'in', 'the', 'darkness', 'bind', 'them']


In [None]:
# solution
count_words = {}
for word in word_list:
  if word in count_words.keys():
    count_words[word] += 1
  else:
    count_words[word] = 1

In [None]:
count_words['ring']

3

In [None]:
# other solution using list count() method --> counts the number of times a word appear

count_words = {}

for word in word_list:
  count_words[word] = word_list.count(word)


In [None]:
# advanced using list count() method and dictionay comprehension

{word : word_list.count(word) for word in word_list}

{'all': 2,
 'and': 1,
 'bind': 1,
 'bring': 1,
 'darkness': 1,
 'find': 1,
 'in': 1,
 'one': 3,
 'ring': 3,
 'rule': 1,
 'the': 1,
 'them': 4,
 'to': 3}

In [None]:
#building dictionaries from lists
keys = ['dog','cat','ant','parrot'] 
values = ['mammal','mammal','insect','bird'] 

#zip and dictionary comprehensions
dict_animals = {x: y for x,y in zip(keys,values)} # zip puts the two lists together and creates a zip object where each element of the first list is associated to the respective element on the secod list
dict_animals

{'ant': 'insect', 'cat': 'mammal', 'dog': 'mammal', 'parrot': 'bird'}

In [None]:
# deep dive into zip 

list(zip(keys,values)) # zip puts the two lists together and creates a zip object where each element of the first list is associated to the respective element on the secod list

[('dog', 'mammal'), ('cat', 'mammal'), ('ant', 'insect'), ('parrot', 'bird')]

In [None]:
#the above dictionary was a bit silly it's difficult to relate ""Data Education Team & Data Student" to "Data Education Team" because they are different strings
categories = ["Data Student", "WebDev Studdent", "WebDev Education Team", "Data Education Team", "Administration Team"]

# welcome to Tuples!
my_improved_dict = {}
my_improved_dict['David'] = ("Data Education Team",)
my_improved_dict['Jose'] = ("Data Education Team", "Data Student")
my_improved_dict['Euclides'] = ("Data Student",)
my_improved_dict['Miguel'] = ("WebDev Education Team",)
my_improved_dict['Munique'] = ("Data Student", "Administration Team")
my_improved_dict

{'David': ('Data Education Team',),
 'Euclides': ('Data Student',),
 'Jose': ('Data Education Team', 'Data Student'),
 'Miguel': ('WebDev Education Team',),
 'Munique': ('Data Student', 'Administration Team')}

In [None]:
# now how can I access the interiors of a Tuple?
my_improved_dict["David"][0]

# don't mess too much with tuples, you cant do much with them once they have been created
# my_improved_dict["David"].append("WebDev Student")

# why using them, then? They are crazy efficient.- The less functionality something has, the more efficient it is at doing that limited set of things
# conversely the more flexibility something has, the more we pay in performance for that flexibility

('Data Education Team',)

In [None]:
# this is still not ideal, if I want to check if José and Munique share a role, tuples are not the way to go
my_improved_dict["Munique"][0]==my_improved_dict["Jose"][0]
my_improved_dict["Munique"][1]==my_improved_dict["Jose"][0]
my_improved_dict["Munique"][0]==my_improved_dict["Jose"][1]
my_improved_dict["Munique"][1]==my_improved_dict["Jose"][1]

False

In [None]:
# sets are good for set operations (not surprisingly)
# they ignore repeated elements

G7 = {"United States", "United Kingdom", "Japan", "France", "Germany", "Canada", "Italy"}
NA = {"Antigua and Barbuda","Bahamas","Barbados","Belize","Canada","Costa Rica","Cuba","Mexico","Nicaragua","Panama","Trinidad and Tobago","United States"}

G7.intersection(NA)
#G7.union(NA)
#NA.difference(G7)
#G7.difference(NA)

{'Canada', 'United States'}

In [None]:
'Canada' in NA

True

In [None]:
'Barbados' in G7

False

In [None]:
'Barbados' not in G7

True

In [None]:
# sets are iterable, but not indexed
#for country in G7:
#  print(country)

#G7[2]

In [None]:
my_improved_dict = {}
my_improved_dict['David'] = {"Data Education Team"}
my_improved_dict['Jose'] = {"Data Education Team", "Data Student"}
my_improved_dict['Euclides'] = {"Data Student"}
my_improved_dict['Miguel'] = {"WebDev Education Team"}
my_improved_dict['Munique'] = {"Data Student", "Administration Team"}
my_improved_dict

{'David': {'Data Education Team'},
 'Euclides': {'Data Student'},
 'Jose': {'Data Education Team', 'Data Student'},
 'Miguel': {'WebDev Education Team'},
 'Munique': {'Administration Team', 'Data Student'}}

In [None]:
#do it yourself: find if Jose and Munique have roles in common
my_improved_dict['Jose'].intersection(my_improved_dict['Munique'])
#find all roles that Munique has that José does not
my_improved_dict['Munique'].difference(my_improved_dict['Jose'])

{'Administration Team'}

In [None]:
# how about a set comprehension?

new_set = {letter for letter in 'abracadabra' if letter != 'a'}
print(new_set)

new_set.issubset({'a','b','r','c','d',3,'l'})

{'b', 'c', 'd', 'r'}


True