### This is the Notebook for Lecture 10

In this lecture, we will learn about the dictionary data structure.

### Using the dict data structure to improve access!

In [1]:
# First approach to defining a dictionary, using dict()
english_to_french = dict()
type(english_to_french)

dict

In [4]:
english_to_french = dict(
    one = 'un',
    two = 'deux',
    three = 'trois',
    four = 'quatre',
    five = 'cinq'
)

In [5]:
english_to_french['four']

'quatre'

In [6]:
# Deliberate KeyError
english_to_french['seven']

KeyError: 'seven'

In [7]:
# Second approach to defining a dictionary using {}
english_to_spanish = {}
type(english_to_spanish)

dict

In [12]:
english_to_spanish = dict(
    one = 'uno',
    two = 'dos',
    three = 'tres',
    four = 'quatro',
    five = 'cinco'
)

In [13]:
english_to_spanish['three']

'tres'

In [15]:
# Adding Key/Value pairs to a dictionary
english_to_spanish['six'] = 'seis'
english_to_spanish['dog'] = 'perro'

print(english_to_spanish)

{'one': 'uno', 'two': 'dos', 'three': 'tres', 'four': 'quatro', 'five': 'cinco', 'six': 'seis', 'dog': 'perro'}


### Dictionary Exception Examples

In [16]:
# One approach: Use true false to look up the KEY
'perro' in english_to_spanish

False

In [17]:
'dog' in english_to_spanish

True

In [18]:
'one' in english_to_spanish

True

In [19]:
# Using try/except to print a dictionary
def print_dict( dict_print, key ):

    # In-Class Code
    try:
        print( dict_print[key] )
    
    except KeyError:
        print( key + ' is not in the dictionary.')

In [20]:
print_dict( english_to_spanish, 'one' )

uno


In [21]:
print_dict( english_to_spanish, '100' )

100 is not in the dictionary.


Additionally, we can use the get method to specify a default value if the key is not present.

In [33]:
english_to_spanish.get('skdslklk', None)

In [23]:
english_to_spanish.get('four', None)

'quatro'

In [24]:
english_to_spanish.get('quatro', None)

In [25]:
english_to_spanish['quatro']

KeyError: 'quatro'

### Printing all the keys and values

In [26]:
# Prints all the keys
english_to_spanish.keys()

dict_keys(['one', 'two', 'three', 'four', 'five', 'six', 'dog'])

In [27]:
# Prints all the values
english_to_spanish.values()

dict_values(['uno', 'dos', 'tres', 'quatro', 'cinco', 'seis', 'perro'])

### Ordering 

In [28]:
family_name = {}

In [30]:
family_name['Matthew'] = 40
family_name['Margot'] = 35
family_name['James'] = 37
family_name['Alfred'] = 73
family_name['Amy'] = 37
family_name['Aidan'] = 7
family_name['Kathy'] = 70
family_name['Evie'] = 3
family_name['Teddy'] = 1

In [31]:
# Note the ordering when I print the keys
family_name.keys()

dict_keys(['Matthew', 'Margot', 'James', 'Alfred', 'Amy', 'Aidan', 'Kathy', 'Evie', 'Baby', 'Teddy'])

In [32]:
# Note the ordering when I print the values
family_name.values()

dict_values([40, 35, 37, 73, 37, 7, 70, 3, 0, 1])

### Compare access time for list and dict
<p></p>
Find an element in a dict is O(1) and a list is, on average O(n)

In [34]:
# Run Time Examples for dicts vs strings
import random

def pop_list( num_vals ):
    
    # Initialize the list
    the_list = [0]
    
    # Attempt to create num_vals random numbers
    for i in range( 0, num_vals ):
        
        # Generate the random integer
        insert_num = random.randint(0, num_vals)
        
        # Iterate through the list and stop when you either find the value or reach the end of the list
        iterator = 0
        while iterator < len(the_list) and the_list[iterator] != insert_num:
            iterator += 1
            
        # Question: Why is this commented code bad?
        # Answer: (Put your answer in here for class notes)
        # while the_list[iterator] != insert_num and iterator < len(the_list):
            # iterator += 1
        
        # If the iterator is at the end, append the list
        if iterator == len(the_list):
            the_list.append(insert_num)


def pop_dict( num_vals ):
    
    # Initialize the dictionary
    the_dict = dict()
    
    # Attempt to create num_vals random numbers
    for i in range( 0, num_vals ):
        
        # Generate the random numbeer
        insert_num = random.randint(0, num_vals)
        
        # Use not in to try to find the dict
        if insert_num not in the_dict:
            the_dict[ insert_num ] = 1

In [35]:
# -n has a limit of 100 runs
%timeit -n 100 pop_list(1000)

24.3 ms ± 92.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [36]:
%timeit -n 100 pop_dict(1000)

549 µs ± 6.21 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### Combining Concepts
<p></p>
Now let's use <b>not in</b> with a list and check the times. Why do you 

In your notes, describe the **difference in time** between pop_list, pop_dict, and pop_list_mod. Why are the run times different, and what is the difference between **program run time** and **program time complexity**?
<p></p>
    <font color="red">Response Here</font>

In [37]:
def pop_list_mod( num_vals ):
    
    # Initialize the list
    the_list = [0]
    
    # Attempt to create num_vals random numbers
    for i in range( 0, num_vals ):
        
        # Generate the random integer
        insert_num = random.randint(0, num_vals)
        
        # Use not in to try to find the list
        if insert_num not in the_list:
            the_list.append(insert_num)

In [38]:
%timeit -n 100 pop_list_mod(1000)

2.4 ms ± 29.1 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### In-Class Coding Opportunity

For every word in a file, count how many times it occurs. Use try/except where appropriate

In [42]:
def word_count(file_name):
    
    # In-Class Code Starts here
    word_dict = {}
    
    try:
        read_file = open(file_name)
        
        for line in read_file:
            
            for word in line.split():
                
                if word in word_dict:
                    word_dict[word] += 1
                    
                else:
                    word_dict[word] = 1
                    
        for key in word_dict:
            print( key, word_dict[key] )
        
    except IOError:
        print('Unable to open file: ' + file_name)

In [43]:
word_count('frost.txt')

Two 2
roads 2
diverged 2
in 3
a 3
yellow 1
wood, 2
And 6
sorry 1
I 8
could 2
not 1
travel 1
both 2
be 2
one 3
traveler, 1
long 1
stood 1
looked 1
down 1
as 5
far 1
To 1
where 1
it 2
bent 1
the 8
undergrowth; 1
Then 1
took 2
other, 1
just 1
fair, 1
having 1
perhaps 1
better 1
claim, 1
Because 1
was 1
grassy 1
and 3
wanted 1
wear; 1
Though 1
for 2
that 3
passing 1
there 1
Had 1
worn 1
them 1
really 1
about 1
same, 1
morning 1
equally 1
lay 1
In 1
leaves 1
no 1
step 1
had 1
trodden 1
black. 1
Oh, 1
kept 1
first 1
another 1
day! 1
Yet 1
knowing 1
how 1
way 1
leads 1
on 1
to 1
way, 1
doubted 1
if 1
should 1
ever 1
come 1
back. 1
shall 1
telling 1
this 1
with 1
sigh 1
Somewhere 1
ages 2
hence: 1
I- 1
less 1
traveled 1
by, 1
has 1
made 1
all 1
difference. 1


### Exercise: emoji translator

Write a function, emoji_translate, that replaces words with emoji icons.

In [46]:
EMOJIS = {
    ':)' : '😀',
    '<3' : '💙',
    'snek': '🐍',
    'pupper': '🐕'
}

def emoji_translate(text):
    # In-Class Code
    result_string = ''
    
    for current_word in text.split():
        
        if current_word in EMOJIS:
            result_string += EMOJIS[current_word]
            
        else:
            result_string += current_word
            
        result_string += ' '
        
    return result_string

In [47]:
emoji_translate('I <3 Notre Dame')

'I 💙 Notre Dame '

In [48]:
emoji_translate('Harry Potter speaks snek')

'Harry Potter speaks 🐍 '

In [49]:
emoji_translate('Eirinn the Pupfessor is a good pupper')

'Eirinn the Pupfessor is a good 🐕 '

In [None]:
from ipywidgets import interact

In [None]:
interact(emoji_translate, text='')

### Spell Checking Setup

In [None]:
# Use import requests to obtain public online files
import requests

# Import string to get the string library
import string

# wget is a Linux command used to download an online file

In [None]:
wget('http://google.com', 'google.txt')

In [None]:
import os

In [None]:
# Review: Check the size of the file we just downloaded
os.path.getsize('google.txt')

In [None]:
# We will now download a publicly available dictionary
wget('https://github.com/dwyl/english-words/raw/master/words.txt', 'words.txt')

In [None]:
os.path.getsize('words.txt')

In [None]:
# To review, we will print the first 100 words in the English dictionary
# Remember, with automatically opens and closes the file

In [1]:
# Now we will load the dictionary into a list
def load_words_list(path):
    
    # Open the file using open(path) as the word_file
    # Add line of code here
        
        # Initialize a list
        words = []
        
        # Iterate through the word file
        for word in word_file:
            
            # Append the word into the dictionary
            # Change the word to lower case, and then strip
            # Add line of code here
    
    # Return the list representing the dictionary
    return words

In [None]:
english_words = load_words_list('words.txt')

In [None]:
len(english_words)