## Dictionaries
* Dictionaries are unordered data structures that map keys into values. The value can be anything (lists, functions, strings, anything). The key has to be immutable, for example, numbers, strings or tuples.



In [1]:
# Defining a dictionary
webstersDict = {'person': 'a human being, whether an adult or child', 
                'marathon': 'a running race that is about 26 miles', 
                'resist': ' to remain strong against the force or effect of (something)', 
                'run': 'to move with haste; act quickly'}

In [2]:
webstersDict


{'person': 'a human being, whether an adult or child',
 'marathon': 'a running race that is about 26 miles',
 'resist': ' to remain strong against the force or effect of (something)',
 'run': 'to move with haste; act quickly'}

### 1. Accessing values in a dictionary¶


In [3]:
# Finding out the meaning of the word marathon
# dictionary[key]
webstersDict['marathon']

'a running race that is about 26 miles'

### 2. Updating a dictionary¶


In [4]:
# add one new key value pair to dictionary
webstersDict['shoe'] = 'an external covering for the human foot'

In [5]:
# return the value for the 'shoe' key
webstersDict['shoe']

'an external covering for the human foot'

In [6]:
# update method, update or add more than key value pair at a time 
webstersDict.update({'shirt': 'a long- or short-sleeved garment for the upper part of the body',
                     'shoe': 'an external covering for the human foot, usually of leather and consisting of a more or less stiff or heavy sole and a lighter upper part ending a short distance above, at, or below the ankle.'})

In [7]:
webstersDict

{'person': 'a human being, whether an adult or child',
 'marathon': 'a running race that is about 26 miles',
 'resist': ' to remain strong against the force or effect of (something)',
 'run': 'to move with haste; act quickly',
 'shoe': 'an external covering for the human foot, usually of leather and consisting of a more or less stiff or heavy sole and a lighter upper part ending a short distance above, at, or below the ankle.',
 'shirt': 'a long- or short-sleeved garment for the upper part of the body'}

In [8]:
# Removing key from dictionary
del webstersDict['resist']

In [9]:
webstersDict

{'person': 'a human being, whether an adult or child',
 'marathon': 'a running race that is about 26 miles',
 'run': 'to move with haste; act quickly',
 'shoe': 'an external covering for the human foot, usually of leather and consisting of a more or less stiff or heavy sole and a lighter upper part ending a short distance above, at, or below the ankle.',
 'shirt': 'a long- or short-sleeved garment for the upper part of the body'}

* __NOTE: Not Everything can Be Used as a Key__

Only immutable objects can be used as keys. Lists cannot be used as keys, but tuples, numbers, and strings can.



In [10]:
webstersDict[['sock']] = 'a short stocking usually reaching to the calf or just above the ankle.'


TypeError: unhashable type: 'list'

### 3. Returning values for a given key using the get() method

* This is so valuable in the word count task



In [11]:
# incorporate into get example and such below. 
storyCount = {'is': 100, 'the': 90, 'Michael': 12, 'runs': 5}

In [12]:
storyCount

{'is': 100, 'the': 90, 'Michael': 12, 'runs': 5}

In [13]:
# key error for keys that do not exist
storyCount['run']

KeyError: 'run'

In [15]:
# if key doesnt exist, 
# specify default value for keys that dont exist. 
# returns value for key you enter if it is in dictionary
# else it returns the value you have for default
storyCount.get('Michael', 0)

12

In [16]:
# When you dont set default value for key that doesnt exist, 
# it defaults to none
print(storyCount.get('run'))

None


In [17]:
# Making default value for key that doesn't exist 0. 
print(storyCount.get('run', 0))

0


### 4. Remove key, return the Value


In [18]:
count = storyCount.pop('the')
print(count)

90


In [19]:
storyCount

{'is': 100, 'Michael': 12, 'runs': 5}

### 5. Iterating through dictionaries


In [20]:
# return keys in dictionary
print(storyCount.keys())

dict_keys(['is', 'Michael', 'runs'])


In [21]:
# return values in dictionary
print(storyCount.values())

dict_values([100, 12, 5])


In [22]:
# iterate through keys
for key in storyCount: 
    print(key)

is
Michael
runs


In [23]:
# iterate through keys and values
for key, value in webstersDict.items():
    print(key, value)

person a human being, whether an adult or child
marathon a running race that is about 26 miles
run to move with haste; act quickly
shoe an external covering for the human foot, usually of leather and consisting of a more or less stiff or heavy sole and a lighter upper part ending a short distance above, at, or below the ankle.
shirt a long- or short-sleeved garment for the upper part of the body


### 6. Count word frequency in text
* using __collections.defaultdict()__

In [24]:
import collections

In [25]:
d = collections.defaultdict(int)
s = 'tv future in the hands of viewers with home theatre systems plasma high-definition tvs \
and digital video recorders moving into the living room  the way people watch tv will be \
radically different in five years  time '

In [26]:
for word in s.split():
    d[word] += 1

In [27]:
d.items()

dict_items([('tv', 2), ('future', 1), ('in', 2), ('the', 3), ('hands', 1), ('of', 1), ('viewers', 1), ('with', 1), ('home', 1), ('theatre', 1), ('systems', 1), ('plasma', 1), ('high-definition', 1), ('tvs', 1), ('and', 1), ('digital', 1), ('video', 1), ('recorders', 1), ('moving', 1), ('into', 1), ('living', 1), ('room', 1), ('way', 1), ('people', 1), ('watch', 1), ('will', 1), ('be', 1), ('radically', 1), ('different', 1), ('five', 1), ('years', 1), ('time', 1)])

In [28]:
d.get('tv')

2

### 7. Calculating with Dictionaries
* perform various calculations(eg. min, max, sorting, etc) 

In [29]:
prices = {
    'MSFT': 133.39,
    'AAPL':202.64,
    'SBUX':94.70,
    'NFLX':291.44,
    'FB':177.75
}

In [30]:
min_price = min(zip(prices.values(), prices.keys()))
min_price

(94.7, 'SBUX')

In [31]:
max_price = max(zip(prices.values(), prices.keys()))
max_price

(291.44, 'NFLX')

In [32]:
prices_sorted =  sorted(zip(prices.values(), prices.keys()))
prices_sorted

[(94.7, 'SBUX'),
 (133.39, 'MSFT'),
 (177.75, 'FB'),
 (202.64, 'AAPL'),
 (291.44, 'NFLX')]

In [33]:
# if perform common data reductions on a dictionary, it will only process the keys, not the values
print(min(prices))
print(max(prices))

AAPL
SBUX


In [34]:
#to fix this, using the values() method of a dict
print(min(prices.values()))
print(max(prices.values()))

94.7
291.44


In [35]:
# to get the key corresponding to the min or max value 
print(min(prices, key = lambda k: prices[k]))
print(max(prices, key = lambda k: prices[k]))

SBUX
NFLX


### 8. Finding commonalities in two dictionaries

In [36]:
a = {
    'x': 1,
    'y': 2,
    'z': 3
}

b = {
    'x': 11,
    'y': 2,
    'w': 30
}



In [37]:
# find keys in common
a.keys() & b.keys()

{'x', 'y'}

In [38]:
# find keys in a that not in b
a.keys() - b.keys()

{'z'}

In [39]:
# find (key, value) pairs in common
a.items() & b.items()

{('y', 2)}

### 9. Extracting a subset of a dictionary

In [40]:
# make a dictionary of all prices over 200
p1 = {key:value for key, value in prices.items() if value>200}
p1

{'AAPL': 202.64, 'NFLX': 291.44}

In [41]:
# make a dictionary of tech stocks
tech_names = {'AAPL', 'IBM', 'HPQ', 'MSFT', 'FB', 'NFLX'}
p2 = {key:value for key, value in prices.items() if key in tech_names}
p2

{'MSFT': 133.39, 'AAPL': 202.64, 'NFLX': 291.44, 'FB': 177.75}

### 10. Sorting a list of dictionaries by a common key
* __itemgetter__ function from the operator module

In [42]:
from operator import itemgetter

In [43]:
rows = [
    {'fname': 'Gordon', 'lname': 'Simpson', 'uid':1003},
    {'fname': 'Stephanie', 'lname': 'Rampling', 'uid':1002},
    {'fname': 'Warren', 'lname': 'Kelly', 'uid':1001},
    {'fname': 'Lily', 'lname': 'Welch', 'uid':1004}
]

In [44]:
rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_fname

[{'fname': 'Gordon', 'lname': 'Simpson', 'uid': 1003},
 {'fname': 'Lily', 'lname': 'Welch', 'uid': 1004},
 {'fname': 'Stephanie', 'lname': 'Rampling', 'uid': 1002},
 {'fname': 'Warren', 'lname': 'Kelly', 'uid': 1001}]

In [45]:
rows_by_uid = sorted(rows, key=itemgetter('uid'))
rows_by_uid

[{'fname': 'Warren', 'lname': 'Kelly', 'uid': 1001},
 {'fname': 'Stephanie', 'lname': 'Rampling', 'uid': 1002},
 {'fname': 'Gordon', 'lname': 'Simpson', 'uid': 1003},
 {'fname': 'Lily', 'lname': 'Welch', 'uid': 1004}]

In [46]:
# itemgetter can accept multiple keys
rows_by_lfnames = sorted(rows, key=itemgetter('fname','lname'))
rows_by_lfnames

[{'fname': 'Gordon', 'lname': 'Simpson', 'uid': 1003},
 {'fname': 'Lily', 'lname': 'Welch', 'uid': 1004},
 {'fname': 'Stephanie', 'lname': 'Rampling', 'uid': 1002},
 {'fname': 'Warren', 'lname': 'Kelly', 'uid': 1001}]

In [47]:
# using lambda expression to sort, itemgetter() runs a bit faster
sorted(rows, key=lambda r: r['uid'])

[{'fname': 'Warren', 'lname': 'Kelly', 'uid': 1001},
 {'fname': 'Stephanie', 'lname': 'Rampling', 'uid': 1002},
 {'fname': 'Gordon', 'lname': 'Simpson', 'uid': 1003},
 {'fname': 'Lily', 'lname': 'Welch', 'uid': 1004}]

In [48]:
sorted(rows, key=lambda r: (r['lname'], r['fname']))

[{'fname': 'Warren', 'lname': 'Kelly', 'uid': 1001},
 {'fname': 'Stephanie', 'lname': 'Rampling', 'uid': 1002},
 {'fname': 'Gordon', 'lname': 'Simpson', 'uid': 1003},
 {'fname': 'Lily', 'lname': 'Welch', 'uid': 1004}]

In [49]:
min(rows, key=itemgetter('uid'))

{'fname': 'Warren', 'lname': 'Kelly', 'uid': 1001}

In [50]:
max(rows, key=itemgetter('uid'))

{'fname': 'Lily', 'lname': 'Welch', 'uid': 1004}

### 11. Grouping records together based on field
* __itertools.groupby()__ function

In [51]:
import itertools

In [52]:
records = [
    {'address' : '2185 Southside Lane', 'date': '07/01/2017'},
    {'address' : '1661 Hillhaven Drive', 'date': '07/03/2017'},
    {'address' : '8262 Pin Oak Drive', 'date': '07/02/2017'},
    {'address' : '3776 Euclid Avenue', 'date': '07/04/2017'},
    {'address' : '2336 Bel Meadow Drive', 'date': '07/01/2017'},
    {'address' : '2773 Gateway Avenue', 'date': '07/03/2017'},
    {'address' : '3413 Park Drive', 'date': '07/04/2017'},
    {'address' : '2121 Rardin Drive', 'date': '07/02/2017'},
    {'address' : '774 Aviation Wayt', 'date': '07/01/2017'},
]

In [53]:
#sort by desired field first, as groupby() only examines consecutive items
records.sort(key=itemgetter('date'))

In [54]:
# iterate in groups
for date, items in itertools.groupby(records, key=itemgetter('date')):
    print(date)
    for i in items:
        print('  ', i)

07/01/2017
   {'address': '2185 Southside Lane', 'date': '07/01/2017'}
   {'address': '2336 Bel Meadow Drive', 'date': '07/01/2017'}
   {'address': '774 Aviation Wayt', 'date': '07/01/2017'}
07/02/2017
   {'address': '8262 Pin Oak Drive', 'date': '07/02/2017'}
   {'address': '2121 Rardin Drive', 'date': '07/02/2017'}
07/03/2017
   {'address': '1661 Hillhaven Drive', 'date': '07/03/2017'}
   {'address': '2773 Gateway Avenue', 'date': '07/03/2017'}
07/04/2017
   {'address': '3776 Euclid Avenue', 'date': '07/04/2017'}
   {'address': '3413 Park Drive', 'date': '07/04/2017'}


In [55]:
# or if the goal is simply group the data together by date into a large data structure that
# allows random access, use defaultdict() to build a multidict
import collections

In [56]:
records_by_date = collections.defaultdict(list)
for record in records:
    records_by_date[record['date']].append(record)

In [57]:
records_by_date

defaultdict(list,
            {'07/01/2017': [{'address': '2185 Southside Lane',
               'date': '07/01/2017'},
              {'address': '2336 Bel Meadow Drive', 'date': '07/01/2017'},
              {'address': '774 Aviation Wayt', 'date': '07/01/2017'}],
             '07/02/2017': [{'address': '8262 Pin Oak Drive',
               'date': '07/02/2017'},
              {'address': '2121 Rardin Drive', 'date': '07/02/2017'}],
             '07/03/2017': [{'address': '1661 Hillhaven Drive',
               'date': '07/03/2017'},
              {'address': '2773 Gateway Avenue', 'date': '07/03/2017'}],
             '07/04/2017': [{'address': '3776 Euclid Avenue',
               'date': '07/04/2017'},
              {'address': '3413 Park Drive', 'date': '07/04/2017'}]})

### 12. Combining multiple mappings into a single mapping
* collections.ChainMap
* update()

In [58]:
a = {'x':1, 'z':3}
b = {'y':2, 'z':4}

In [59]:
c = collections.ChainMap(a,b)
print(c['x'])
print(c['y'])
print(c['z']) # if there are duplicate keys, the values from the first mapping get used

1
2
3


In [60]:
len(c)

3

In [61]:
list(c.keys())

['y', 'z', 'x']

In [62]:
list(c.values())

[2, 3, 1]

In [63]:
# operation that mutate the mapping always affect the first mapping listed
c['z'] = 10
c['w'] = 40
c

ChainMap({'x': 1, 'z': 10, 'w': 40}, {'y': 2, 'z': 4})

In [64]:
del c['x']
c

ChainMap({'z': 10, 'w': 40}, {'y': 2, 'z': 4})

In [65]:
a

{'z': 10, 'w': 40}

In [66]:
# chainmap is particularly useful when working with scoped values
values = collections.ChainMap()
values['x'] = 1
values = values.new_child()  # add new mapping
values['x'] = 2
values = values.new_child()  # add new mapping
values['x'] = 3
values

ChainMap({'x': 3}, {'x': 2}, {'x': 1})

In [67]:
values['x']

3

In [68]:
values = values.parents    #discard last mapping
values

ChainMap({'x': 2}, {'x': 1})

In [69]:
values = values.parents    #discard last mapping
values

ChainMap({'x': 1})

In [70]:
# merge dictionaries together using the update() method
a = {'x':1, 'z':3}
b = {'y':2, 'z':4}

In [71]:
# requires to make a completely separate dictionary object
# or destructively alter one of the existing dictionary
merged = dict(b)
merged.update(a)
merged

{'y': 2, 'z': 3, 'x': 1}