1.Data Structure

In [1]:
#1.1 Unpacking a sequence into seperate variables
data = [ 'ACME', 50, 91.1, (2012, 12, 21) ]
_,share,price,_=data

In [3]:
print(share)
print(price)

50
91.1


In [4]:
#1.2 Unpacking Elements from Iterables of Arbitrary Length
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
name,email,*phone_numbers=record

In [5]:
phone_numbers

['773-555-1212', '847-555-1212']

In [6]:
line = 'nobody:*:-2:-2:Unprivileged User:/var/empty:/usr/bin/false'
uname,*fields,homedir,sh=line.split(':')

In [7]:
fields

['*', '-2', '-2', 'Unprivileged User']

In [8]:
#1.3 Keeping the Last N Items
from collections import deque

In [10]:
q=deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
q

deque([1, 2, 3])

In [12]:
q.append(4)
q

deque([3, 4, 4])

In [14]:
q.appendleft(4)
q

deque([4, 4, 3])

In [15]:
q.pop()
q

deque([4, 4])

In [16]:
q.popleft()
q

deque([4])

In [17]:
#1.4 Finding the largest or smallest N items
import heapq

In [18]:
nums = [1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]
print(heapq.nlargest(3, nums))
print(heapq.nsmallest(3, nums))

[42, 37, 23]
[-4, 1, 2]


In [19]:
heap=list(nums)
heapq.heapify(heap)
heapq.heappop(heap)


-4

In [20]:
heap

[1, 2, 2, 23, 7, 8, 18, 23, 42, 37]

In [21]:
#1.5 Implementing a priority queue
class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item))
        self._index += 1
    def pop(self):
        return heapq.heappop(self._queue)[-1]

In [22]:
class Item:
    def __init__(self, name):
         self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)

In [23]:
q=PriorityQueue()
q.push(Item('foo'),1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)

In [25]:
a = (1, Item('foo'))
b = (5, Item('bar'))
a<b

True

In [28]:
#1.6 Mapping Keys to Multiple Values in a Dictionary
d = {
'a' : [1, 2, 3],
'b' : [4, 5]
} 
e = {
'a' : {1, 2, 3},
'b' : {4, 5}
}

In [29]:
from collections import defaultdict
d = defaultdict(list)
d['a'].append(1)
d['a'].append(2)
d['b'].append(4)
d = defaultdict(set)

In [30]:
d['a'].add(1)
d['a'].add(2)
d['b'].add(4)

In [31]:
d = {} # A regular dictionary
d.setdefault('a', []).append(1)
d.setdefault('a', []).append(2)
d.setdefault('b', []).append(4)

In [37]:
# 1.7 Keeping Dictionaries in Order
from collections import OrderedDict #OrderedDict is more than twice as large as normal dictionary.
d = OrderedDict()
d['foo'] = 1
d['bar'] = 2
d['spam'] = 3
d['grok'] = 4
d

OrderedDict([('foo', 1), ('bar', 2), ('spam', 3), ('grok', 4)])

In [41]:
#1.8 Calculating with dictinaries
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
min_price= min(zip(prices.values(),prices.keys()))  
#zip() creates an iterator that can only be consumed once.It has to work with another function
prices_sorted = sorted(zip(prices.values(), prices.keys()))

In [42]:
min(prices, key=lambda k: prices[k]) 

'FB'

In [43]:
prices[min(prices, key=lambda k: prices[k])]

10.75

In [44]:
# If the value are the same, key will determine the calculations.
prices = { 'AAA' : 45.23, 'ZZZ': 45.23 }
print(min(zip(prices.values(), prices.keys())))
print(max(zip(prices.values(), prices.keys())))

(45.23, 'AAA')
(45.23, 'ZZZ')


In [46]:
#1.9 Finding Commonalities in two dictionaries
a = {
'x' : 1,
'y' : 2,
'z' : 3
}
b = {
'w' : 10,
'x' : 11,
'y' : 2
}

In [47]:
a.keys() & b.keys()

{'x', 'y'}

In [49]:
a.items() & b.items()

{('y', 2)}

In [50]:
#1.10. Removing Duplicates from a Sequence while Maintaining Order
def dedupe(items):
    seen = set()
    for item in items:
        if item not in seen:
            yield item
            seen.add(item)

In [52]:
a = [1, 5, 2, 1, 9, 1, 5, 10]
list(dedupe(a))

[1, 5, 2, 9, 10]

In [53]:
#1.11 Naming a slice
items = [0, 1, 2, 3, 4, 5, 6]
a = slice(2, 4)
items[a]

[2, 3]

In [54]:
#1.12. Determining the Most Frequently Occurring Items in a Sequence
words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]
from collections import Counter
word_counts=Counter(words)
top_three = word_counts.most_common(3)
print(top_three)

[('eyes', 8), ('the', 5), ('look', 4)]


In [58]:
morewords = ['why','are','you','not','looking','in','my','eyes']
for word in morewords:
    word_counts[word] +=1
word_counts['eyes']

11

In [61]:
word_counts.update(morewords)
word_counts

Counter({'are': 6,
         'around': 2,
         "don't": 1,
         'eyes': 14,
         'in': 6,
         'into': 3,
         'look': 4,
         'looking': 6,
         'my': 9,
         'not': 7,
         'the': 5,
         'under': 1,
         'why': 6,
         'you': 6,
         "you're": 1})

In [63]:
c=Counter(words)+Counter(words)
c

Counter({'around': 4,
         "don't": 2,
         'eyes': 16,
         'into': 6,
         'look': 8,
         'my': 6,
         'not': 2,
         'the': 10,
         'under': 2,
         "you're": 2})

In [64]:
# 1.13 Sorting a List of Dictionaries by a common key
rows = [
{'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
{'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

In [71]:
from operator import itemgetter
rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_fname


[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004},
 {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
 {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
 {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]

In [74]:
rows_by_lfname = sorted(rows, key=lambda r: (r['lname'],r['fname']))
rows_by_lfname

[{'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
 {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
 {'fname': 'Big', 'lname': 'Jones', 'uid': 1004},
 {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}]

In [73]:
min(rows, key=itemgetter('uid'))

{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}

In [75]:
#1.14 Sorting Objects without Native Comparison Support
class User:
    def __init__(self, user_id):
        self.user_id = user_id
    def __repr__(self):
        return 'User({})'.format(self.user_id)
users = [User(23), User(3), User(99)]
sorted(users, key=lambda u: u.user_id)

[User(3), User(23), User(99)]

In [76]:
from operator import attrgetter
sorted(users, key=attrgetter('user_id'))

[User(3), User(23), User(99)]

In [79]:
by_name = sorted(users, key=attrgetter('user_id'))
by_name

[User(3), User(23), User(99)]

In [80]:
#1.15 Grouping Records Together Based on a Field
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

In [81]:
from itertools import groupby
rows.sort(key=itemgetter('date'))

In [91]:
for date, items in groupby(rows, key=itemgetter('date')):
    print(date)
    for i in items:
        print(' ', i)

07/01/2012
  {'address': '5412 N CLARK', 'date': '07/01/2012'}
  {'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
  {'address': '5800 E 58TH', 'date': '07/02/2012'}
  {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
  {'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
  {'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
  {'address': '5148 N CLARK', 'date': '07/04/2012'}
  {'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


In [87]:
from collections import defaultdict
rows_by_date = defaultdict(list)
for row in rows:
    rows_by_date[row['date']].append(row)

In [89]:
for r in rows_by_date['07/01/2012']:
    print(r)

{'address': '5412 N CLARK', 'date': '07/01/2012'}
{'address': '4801 N BROADWAY', 'date': '07/01/2012'}


In [92]:
# 1.16 Filtering Sequence Elements
mylist = [1, 4, -5, 10, -7, 2, 3, -1]
[n for n in mylist if n > 0]

[1, 4, 10, 2, 3]

In [94]:
 #Sometimes list comprehension could produce a large result. In that case, we can use generators
pos = (n for n in mylist if n > 0)
pos
for x in pos:
    print(x)

1
4
10
2
3


In [98]:
values = ['1', '2', '-3', '-', '4', 'N/A', '5']
def is_int(val):
    try:
        x = int(val)
        return True
    except ValueError:
        return False
ivals=list(filter(is_int,values))
print(ivals)

['1', '2', '-3', '4', '5']


In [99]:
addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK'
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]
counts = [ 0, 3, 10, 4, 1, 7, 6, 1]

In [101]:
from itertools import compress
more5 = [n > 5 for n in counts]
list(compress(addresses, more5))
#compress could be useful if you want to apply the result from one sequence to another

['5800 E 58TH', '4801 N BROADWAY', '1039 W GRANVILLE']

In [102]:
#1.17 Extracting a subset of a dictionary
prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}
p1 = { key:value for key, value in prices.items() if value > 200 }
tech_names = { 'AAPL', 'IBM', 'HPQ', 'MSFT' }
p2 = { key:value for key,value in prices.items() if key in tech_names }

In [104]:
print(p1)
print(p2)

{'AAPL': 612.78, 'IBM': 205.55}
{'AAPL': 612.78, 'IBM': 205.55, 'HPQ': 37.2}


In [106]:
# 1.18 Mapping Names to sequence elements
from collections import namedtuple
Subscriber = namedtuple('Subscriber', ['addr', 'joined'])
sub = Subscriber('jonesy@example.com', '2012-10-19')
sub

Subscriber(addr='jonesy@example.com', joined='2012-10-19')

In [108]:
addr, join=sub
print(addr)
print(join)

jonesy@example.com
2012-10-19


In [114]:
Stock = namedtuple('Stock', ['name', 'shares', 'price'])
s=Stock('ACME',100,123.45)
s=s._replace(shares=75)
s
#Namedtuple may not be efficient if you want to change various instance attributes for an efficient data structure

Stock(name='ACME', shares=75, price=123.45)

In [118]:
# 1.19 Transforming and reducing Data at the same time.
nums = [1,2,3,4,5]
s= sum(x * x for x in nums)
import os
files = os.listdir ('C:\\Users\\Sherry Yang\\Documents\\Python Scripts\\')
if any(name.endswith('.py') for name in files):
    print ('python')
else:
    print('no')

no


In [119]:
s = ('ACME', 50, 123.45)
print(','.join(str(x) for x in s))

ACME,50,123.45


In [122]:
portfolio = [
{'name':'GOOG', 'shares': 50},
{'name':'YHOO', 'shares': 75},
{'name':'AOL', 'shares': 20},
{'name':'SCOX', 'shares': 65}
]
min_shares = min(s['shares'] for s in portfolio)
print(min_shares)
min_shares = min(portfolio, key=lambda s: s['shares'])
print(min_shares)

20
{'name': 'AOL', 'shares': 20}


In [123]:
#1.20 Combining Multiple Mappings into a Single Mapping
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }

In [124]:
from collections import ChainMap
c = ChainMap(a,b)
print(c['x']) # Outputs 1 (from a)
print(c['y']) # Outputs 2 (from b)
print(c['z']) # Outputs 3 (from a)

1
2
3


In [125]:
values=ChainMap()
values['x']=1
values=values.new_child()
values['x']=2
values

ChainMap({'x': 2}, {'x': 1})

In [127]:
values=values.parents
values['x']

1

In [131]:
#Another way is to use update
a = {'x': 1, 'z': 3 }
b = {'y': 2, 'z': 4 }
merged=dict(b)
merged.update(a)
merged['x']

1

In [133]:
a['x']=13
merged['x']

1

Chapter 2 Strings and Text

In [134]:
#2.1 Splitting Strings on any of Multiple Delimit
import re

In [135]:
line = 'asdf fjdk; afed, fjek,asdf, foo'
re.split(r'[;,\s]\s*', line)

['asdf', 'fjdk', 'afed', 'fjek', 'asdf', 'foo']

In [136]:
fields = re.split(r'(;|,|\s)\s*', line)
fields

['asdf', ' ', 'fjdk', ';', 'afed', ',', 'fjek', ',', 'asdf', ',', 'foo']

In [137]:
values=fields[::2]
delimiters=fields[1::2]+['']
''.join(v+d for v,d in zip(values,delimiters))

'asdf fjdk;afed,fjek,asdf,foo'