In [5]:
### limited history of few items
from collections import deque

def search(lines, pattern, history=5):
    previous_values = deque(maxlen=history)
    for line in lines:
        if pattern in line:
            yield line, previous_values
        previous_values.append(line)

with open("data/sample_file1.txt") as f:
    for line, prevlines in search(f, "python", 2):
        for pline in prevlines:
            print(pline, end=' ')
        print(line, end=' ')
        print('-'*20)

line 1 python is easy
 --------------------
line 1 python is easy
 line 2 python is easy
 --------------------
line 1 python is easy
 line 2 python is easy
 line 3 python is easy
 --------------------
line 2 python is easy
 line 3 python is easy
 line 4 python is easy
 --------------------
line 3 python is easy
 line 4 python is easy
 line 5 python is easy
 --------------------
line 4 python is easy
 line 5 python is easy
 line 6 python is easy
 --------------------
line 5 python is easy
 line 6 python is easy
 line 7 python is easy --------------------


In [None]:
### Finding the cheapest and the most expensive stocks

In [7]:
portfolio = [
   {'name': 'IBM', 'shares': 100, 'price': 91.1},
   {'name': 'AAPL', 'shares': 50, 'price': 543.22},
   {'name': 'FB', 'shares': 200, 'price': 21.09},
   {'name': 'HPQ', 'shares': 35, 'price': 31.75},
   {'name': 'YHOO', 'shares': 45, 'price': 16.35},
   {'name': 'ACME', 'shares': 75, 'price': 115.65}
]

In [9]:
### finding the cheapest and the most expensive stocks from these

import heapq

smallest_stock = heapq.nsmallest(1, portfolio, lambda val: val['price']) # this returns the object with the smallest price

print(smallest_stock)

[{'name': 'YHOO', 'shares': 45, 'price': 16.35}]


In [11]:
# getting the largest 3 stocks

largest_3 = heapq.nlargest(3, portfolio, lambda val: val['price'])

for ticker in largest_3:
    print(ticker)

{'name': 'AAPL', 'shares': 50, 'price': 543.22}
{'name': 'ACME', 'shares': 75, 'price': 115.65}
{'name': 'IBM', 'shares': 100, 'price': 91.1}


In [14]:
### Implementaing a priority queue

import heapq

class PriorityQueue:
    def __init__(self):
        self.queue = []
        self._index = 0 
    
    def push(self, val, priority):
        heapq.heappush(self.queue, (-priority, self._index, val))
        self._index += 1

    def pop(self):
        return heapq.heappop(self.queue)[-1]

In [17]:
class Item:
    def __init__(self, name):
        self.name = name

    def __repr__(self):
        return "Item({!r})".format(self.name)


In [18]:
pq = PriorityQueue()

In [20]:
pq.push(Item(10), 10)
pq.push(Item(30), 30)
pq.push(Item(90), 90)

print(pq.pop())
print(pq.pop())
print(pq.pop())

Item(90)
Item(30)
Item(10)


In [30]:
from collections import defaultdict
import unittest

x = defaultdict(list)

x['a'].append(1)
x['a'].append(2)
x['b'].append(3)

y = {}

y.setdefault('a', []).append(1)
y.setdefault('a', []).append(2)
y.setdefault('b', []).append(3)

In [36]:
print(dict(x))
print(y)

{'a': [1, 2], 'b': [3]}
{'a': [1, 2], 'b': [3]}


In [39]:
class TestList(unittest.TestCase):
    def test_list_eq(self):
        self.assertDictEqual(x, y) # Note:

# this is the method for using unitest inside juputer kernel
unittest.main(argv=[''], verbosity=2, exit=False)

test_list_eq (__main__.TestList) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.002s

OK


<unittest.main.TestProgram at 0x105ac3220>

In [55]:
dict_a = {
   'x' : 1,
   'y' : 2,
   'z' : 3
}

dict_b = {
   'x' : 11,
   'y' : 2,
   'w' : 10
}


In [56]:
print(dict_a.keys())
print(dict_b.keys())

dict_keys(['x', 'y', 'z'])
dict_keys(['x', 'y', 'w'])


In [57]:
# finding keys in common
print(dict_a.keys() & dict_b.keys())

{'x', 'y'}


In [58]:
# find the keys in dict_a but not in dict_b - z

dict_a.keys() - dict_b.keys()

{'z'}

In [59]:
# fnd the (k,v) that are common in both

dict_a.items() & dict_b.items()

{('y', 2)}

In [60]:
# smart way to remove keys when looping

for k in dict_a.keys() - {'y'}:
    print(k, dict_a[k])

z 3
x 1


In [61]:
# removing duplicates from a sequence while maintaing order

lst_a = ['a', 'a', 'b', 'c', 'c', 'c']

def dedupe(lst):
    seen = set()
    for item in lst:
        if item not in seen:
            yield item
            seen.add(item)

for val in dedupe(lst_a):
    print(val)

a
b
c


In [63]:
# What if the item we are tying to dedupe is not hashable
# we will assume a key, that can use used to make a unhashable type to hashable

def dedupe(items, key=None):
    seen = set()
    for item in items:
        val = item if key is None else key(item)
        if val not in seen:
            yield item
            seen.add(val)

In [64]:
# data

items = [0,1,2,3,4,5,6,7]

a = slice(2, 4)

print(items[2:4])

print(items[a])

[2, 3]
[2, 3]


In [65]:
rows = [
    {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003},
    {'fname': 'David', 'lname': 'Beazley', 'uid': 1002},
    {'fname': 'John', 'lname': 'Cleese', 'uid': 1001},
    {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}
]

In [66]:
from operator import itemgetter

rows_by_fname = sorted(rows, key=itemgetter('fname'))
rows_by_uid = sorted(rows, key=itemgetter('uid'))

print(rows_by_uid)
print(rows_by_fname)

[{'fname': 'John', 'lname': 'Cleese', 'uid': 1001}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'Big', 'lname': 'Jones', 'uid': 1004}]
[{'fname': 'Big', 'lname': 'Jones', 'uid': 1004}, {'fname': 'Brian', 'lname': 'Jones', 'uid': 1003}, {'fname': 'David', 'lname': 'Beazley', 'uid': 1002}, {'fname': 'John', 'lname': 'Cleese', 'uid': 1001}]


In [70]:
### Chain Map - works with the original dict

from collections import ChainMap

# can be used to enforce versioning
# i.e the value of the chain, will be in the order of their appearance in the chain

a = {'x': 1, 'z': 3}
b = {'x':2 , 'z': 4, 'w': 1}
c = {'x': 3, 'w': 2, 'q': 10}

m = ChainMap(a, b, c)

assert m['x'] == 1
assert m['z'] == 3
assert m['w'] == 1
assert m['q'] == 10

# also since chainmap uses the original dicts, its by reference

a['x'] = 10

assert m['x'] == 10

In [None]:
# group records together by a field

In [74]:
rows = [
    {'address': '5412 N CLARK', 'date': '07/01/2012'},
    {'address': '5148 N CLARK', 'date': '07/04/2012'},
    {'address': '5800 E 58TH', 'date': '07/02/2012'},
    {'address': '2122 N CLARK', 'date': '07/03/2012'},
    {'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
    {'address': '1060 W ADDISON', 'date': '07/02/2012'},
    {'address': '4801 N BROADWAY', 'date': '07/01/2012'},
    {'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]


In [75]:
from operator import itemgetter
from itertools import groupby

rows.sort(key=itemgetter('date'))
          
# Iterate in the groups
for date, items in groupby(rows, key=itemgetter('date')): # note: key is passed a callable
    print(date)
    for i in items:
        print(i)

          

07/01/2012
{'address': '5412 N CLARK', 'date': '07/01/2012'}
{'address': '4801 N BROADWAY', 'date': '07/01/2012'}
07/02/2012
{'address': '5800 E 58TH', 'date': '07/02/2012'}
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'}
{'address': '1060 W ADDISON', 'date': '07/02/2012'}
07/03/2012
{'address': '2122 N CLARK', 'date': '07/03/2012'}
07/04/2012
{'address': '5148 N CLARK', 'date': '07/04/2012'}
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'}


In [78]:
# sorting objects without native comparison support

class User:
    def __init__(self, user_id):
        self.user_id = user_id
    
    def __repr__(self):
        return "User ({!r})".format(self.user_id)
    

users = [User(33), User(21), User(99)]

print(sorted(users, key=lambda u: u.user_id))

# alternatively
from operator import attrgetter
print(sorted(users, key=attrgetter('user_id'))) # here we can sort by multiple attributes


[User (21), User (33), User (99)]
[User (21), User (33), User (99)]
