In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 1.1 Unpacking a sequence into variables

**Problem** <br> 
You have an N-element tuple or sequence that you would like to unpack into a collection
of N variables.

In [None]:
p = (4,5)
x,y = p
print("x =", x)
print("y =", y)

In [None]:
data = ["ACME", 50, 91.1, (2012,12,21)]
name, shares, price, date = data      #If there's a mismatch it'll 
print(shares, " ", name, "shares at ", price, " on ", date)

In [None]:
#Also works on strings
s = "Hello"
a,b,c,d,e = s
print(a)

In [None]:
#can also discard certain values, no special syntax but can use a throwaway variable. 
_, shares, price, _ = data
print(shares)
print(price)

# 1.2 Unpacking Elements from Iterables of Arbitrary Lengths

**Problem**<br>
You need to unpack N elements from an iterable, but the iterable may be longer than N
elements, causing a “too many values to unpack” exception.

In [None]:
#Python star expressions
# if you have 24 items to average and you wanna drop first and last and only average the middle. 
### for 3 or 4 items this is easy, for 24 use *

def drop_first_last(grades):
    first, *middle, last = grades
    return avg(middle)

In [None]:
#another example
record = ('Dave', 'dave@example.com', '773-555-1212', '847-555-1212')
name, email, *phone_numbers = record #*phone_numbers mean rest of the items are phone nums | saved in a list
print(phone_numbers)

In [1]:
#starred variable can also be first on the list
*trailing, current = [10,8,7,1,9,5,10,3]
print(current)
print(trailing)

3
[10, 8, 7, 1, 9, 5, 10]


# 1.3 Keeping the Last N Items

**Problem** <br> 
You want to keep a limited history of the last few items seen during iteration or during some other kind of processing. <br> 

Geeks for geeks link to deque -> https://www.geeksforgeeks.org/deque-in-python/  <br>
Hacker rank -> https://www.hackerrank.com/challenges/py-collections-deque/problem <br> 

In [2]:
from collections import deque #we'll be using deque to accomplish task

def search(lines, pattern, history=5):
    previous_lines = deque(maxlen=history)
    
    for line in lines:
        if pattern in line:
            yield line, previous_lines
        previous_lines.append(line)

In [None]:
## EXAMPLE USE ON A FILE (not tried this yet)## 

if __name__ == '__main__':
    with open('somefile.txt') as f:
        for line, prevlines in search(f, 'python', 5):
            for pline in prevlines:
                print(pline, end='')
            print(line,end='')
            print('-'*20)
            
            

In [4]:
"""Using deque(maxlen=N) creates a fixed sized queue. 
When new items are added and the queue is full, 
the oldest item is automatically removed."""

q = deque(maxlen=3)
q.append(1)
q.append(2)
q.append(3)
q

deque([1, 2, 3])

In [5]:
q.append(4)
q

deque([2, 3, 4])

In [6]:
q.append(5)
q

deque([3, 4, 5])

We could do this appending and deleting with lists as well but we use deque because, <br>
1- It is an elegant solution. <br>
2- It's efficient and runs faster. Computational cost is O(1) while on lists it's O(n). <br>

In [7]:
"""When a maxlen is not provided,
we get an unbounded queue and we can append on either end.
Time complexity is O(1)"""

m = deque()
m.append(1)
m.append(2)
m.append(3)
m

deque([1, 2, 3])

In [8]:
m.appendleft(4)
m

deque([4, 1, 2, 3])

In [9]:
m.pop()

3

In [10]:
m.popleft()
m

deque([1, 2])

In [13]:
m.append(5)
m.append(6)
m.append(7)
m 

deque([1, 2, 5, 6, 7, 5, 6, 7, 5, 6, 7])

**Important Observation** ->  In above code where 5,6,7 is appended; 5,6 and 7 are appended as many times as you run the code. I ran it thrice so we have three 5,6,7 appends. <br>

# 1.4 Finding the Largest or Smallest N Items 

**Problem** <br>
You want to make a list of the largest or smallest N items in a collection. <br> 

**Links related to heapq** <br>
1- documentation -> https://docs.python.org/3.0/library/heapq.html <br> 

2- geeks for geeks -> https://www.geeksforgeeks.org/heap-queue-or-heapq-in-python/ <br>

3- tutorials point -> https://www.tutorialspoint.com/heap-queue-or-heapq-in-python <br> 

In [14]:
### heapq module 

import heapq

nums = [1,8,2,23,7,-4,18,23,42,37,2]
print(heapq.nlargest(3,nums)) ## 3 largest numbers
print(heapq.nsmallest(3,nums)) ## 3 smalles nums

[42, 37, 23]
[-4, 1, 2]


In [15]:
#### USING ON A MORE COMPLICATED DATA STRUCTURE:

portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]

cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
print (cheap)
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])
print (expensive)

[{'name': 'YHOO', 'shares': 45, 'price': 16.35}, {'name': 'FB', 'shares': 200, 'price': 21.09}, {'name': 'HPQ', 'shares': 35, 'price': 31.75}]
[{'name': 'AAPL', 'shares': 50, 'price': 543.22}, {'name': 'ACME', 'shares': 75, 'price': 115.65}, {'name': 'IBM', 'shares': 100, 'price': 91.1}]


In [16]:
nums

[1, 8, 2, 23, 7, -4, 18, 23, 42, 37, 2]

In [24]:
# on calling heapify directly on nums without converting it to a list, 
## the returned value is none. 
heap = list(nums)  #first convert to list
heapq.heapify(nums)
heap

[-4, 2, 1, 23, 7, 2, 18, 23, 42, 37, 8]

In [25]:
"""
heap[0] is always the smallest item.
heappop() returns and removes the smallest item and replaces heap[0]
with the next smallest item. Time complexity -> O(log n)

more info on page 8
"""

#to find three smallest items
heapq.heappop(heap)

-4

In [26]:
heapq.heappop(heap)

1

In [27]:
heapq.heappop(heap)

2

# 1.5 Implementing a Priority Queue

**Problem** <br>
You want to implement a queue that sorts items by a given priority and always returns the item with the highest priority on each pop operation. <br> 

Read discussion section on page 9 for more details. 

In [30]:
import heapq

class PriorityQueue:
    def __init__(self):
        self._queue = []
        self._index = 0
        
    def push(self, item, priority):
        heapq.heappush(self._queue, (-priority, self._index, item)) 
        #-priority to order it from max to min because by default it goes from lowest to highest
        self._index += 1
        
    def pop(self):
        return heapq.heappop(self._queue)[-1]

In [35]:
### How it might be used example

class Item:
    def __init__(self,name):
        self.name = name
    def __repr__(self):
        return 'Item({!r})'.format(self.name)
    
q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)
q.pop()

Item('bar')

In [36]:
q.pop()

Item('spam')

In [37]:
q.pop()

Item('foo')

In [38]:
q.pop() ## returns in the same order as they were inserted. 

Item('grok')

# 1.6 Mapping Keys to Multiple Values in a Dictionary

**Problem** <br>
You want to make a dictionary that maps keys to more than one value (a so-called "multidict") 

In [39]:
## to map to multiple values store the values in a different container

d = {
    'a' : [1,2,3],
    'b' : [4,5]
}

# OR 

e = {
    'a' : {1,2,3}, #{1,2,3 is a set not dict}
    'b' : {4,5}
}

In [42]:
#using defaultdicts

from collections import defaultdict

d = defaultdict(list)
d['a'].append(1)
d['b'].append(2)
d['c'].append(3)

d

defaultdict(list, {'a': [1], 'b': [2], 'c': [3]})

In [45]:
d['a'].append(5) #will keep appending 5 as many times as you run this cell
d 

defaultdict(list, {'a': [1, 5, 5, 5], 'b': [2], 'c': [3]})

In [47]:
s = defaultdict(set)
s['a'].add(1)
s['b'].add(2)
s['c'].add(3)
s

defaultdict(set, {'a': {1}, 'b': {2}, 'c': {3}})

In [48]:
s['a'].add(1)
s['a'].add(5)
s

defaultdict(set, {'a': {1, 5}, 'b': {2}, 'c': {3}})

# 1.7 Keeping Dictionaries in Order

**Problem** 
You want to create a dictionary and you also want to control the order of items when iterating or serializing. <br> 

In [50]:
## OrderedDict preserves original insertion order
## Ordered dicts take up more memory as they create a doubly linked list
## when a new item is inserted it's placed at the end of the list.
## subsequent reassignment of an existing key doesn't change the order. 
from collections import OrderedDict

k = OrderedDict()
k['foo'] = 1
k['bar'] = 2
k['spam'] = 3
k['grok'] = 4

for key in k:
    print(key, k[key])

foo 1
bar 2
spam 3
grok 4


In [51]:
"""useful for building mapping that you may later want 
to serialize or encode into a different format. like json """

import json
json.dumps(k)

'{"foo": 1, "bar": 2, "spam": 3, "grok": 4}'