## Deck of Cards

In [1]:
import collections
from random import choice

Card = collections.namedtuple('Card', ['rank', 'suit'])
class FrenchDeck:
    ranks = [str(n) for n in range(2, 11)] + list('JQKA')
    suits = 'spades diamonds clubs hearts'.split()
    
    def __init__(self):
        self._cards = [Card(rank, suit) for suit in self.suits for rank in self.ranks]
    
    def __len__(self):
        return len(self._cards)
    
    def __getitem__(self, position):
        return self._cards[position]

suit_values = dict(spades=3, hearts=2, diamonds=1, clubs=0)

def spades_high(card):
    rank_value = FrenchDeck.ranks.index(card.rank)
    return rank_value * len(suit_values) + suit_values[card.suit]

def sorted_deck(deck):
    return (card for card in sorted(deck, key=spades_high))
                                                       

In [2]:
print Card('7', 'diamonds')
deck = FrenchDeck()
print len(deck)
print deck[0]
# since we implemented len and getitem, we can just use random's choice
print choice(deck)
# also supports slicing
print deck[:3]

# for card in sorted_deck(deck):
#     print card

Card(rank='7', suit='diamonds')
52
Card(rank='2', suit='spades')
Card(rank='7', suit='hearts')
[Card(rank='2', suit='spades'), Card(rank='3', suit='spades'), Card(rank='4', suit='spades')]


## Vector

In [3]:
from math import hypot

class Vector:
    def __init__(self, x=0, y=0):
        self.x = x
        self.y = y
    
    def __repr__(self):
        return 'Vector(%r, %r)' % (self.x, self.y)
    
    def __abs__(self):
        return hypt(self.x, self.y)
    
    def __bool__(self):
        return bool(self.x or self.y)
    
    def __add__(self, other):
        return Vector(self.x + other.x, self.y + other.y)
    
    def __mul__(self, scalar):
        return Vector(self.x*scalar, self.y*scalar)

## Generator Expressions

In [4]:
colors = 'black white'.split()
sizes= 'S M L'.split()
for tshirt in ('%s, %s' % (c,s) for c in colors for s in sizes):
    print tshirt

black, S
black, M
black, L
white, S
white, M
white, L


## Named Tuples
* Adds field names to tuples so you can access them with the dot operator
* Also adds:
    - **._fields()** which lists out the fields
    - **._asdict()** which makes a dictionary out of the tuple
    - **._make(iterable)** which makes a named dict out of tuple

In [5]:
from collections import namedtuple
City = namedtuple('City', 'name country population coordinates')
tokyo = City('Tokyo', 'JP', 36.933, (35.68, 139.69))
print tokyo, tokyo.population, tokyo.coordinates

City(name='Tokyo', country='JP', population=36.933, coordinates=(35.68, 139.69)) 36.933 (35.68, 139.69)


## Bisect: binary search
* ``bisect(haystack, needle)`` returns the index of where needle should be placed in the haystack
* ``insort(seq, item)`` inserts the item in the sequence to keep the order

In [6]:
import bisect
import random

def grade(score, breakpoints=[60,70,80,90], grades='FDCBA'):
    i = bisect.bisect(breakpoints, score)
    return grades[i]

print [grade(score) for score in [33, 99, 77, 70, 89, 90, 100]]

my_list = []
SIZE = 7
for i in range(SIZE):
    new_item = random.randrange(SIZE*2)
    bisect.insort(my_list, new_item)
    print '%2d ->' % new_item, my_list

['F', 'A', 'C', 'C', 'B', 'A', 'A']
 0 -> [0]
 6 -> [0, 6]
12 -> [0, 6, 12]
 2 -> [0, 2, 6, 12]
12 -> [0, 2, 6, 12, 12]
 8 -> [0, 2, 6, 8, 12, 12]
 0 -> [0, 0, 2, 6, 8, 12, 12]


## Trie with setdefault

In [7]:
def build_trie(str, trie={}):
    sub_trie = trie
    for char in str:
        sub_trie = sub_trie.setdefault(char, {})
    return trie

def parse_trie(str, trie):
    sub_trie = trie
    for char in str:
        if char in sub_trie:
            sub_trie = sub_trie[char]
        else:
            return sub_trie
    return sub_trie
        

trie = {}
print build_trie('abcd', trie=trie)
print build_trie('abce', trie=trie)
print build_trie('acbd', trie=trie)
print parse_trie('abc', trie)

{'a': {'b': {'c': {'d': {}}}}}
{'a': {'b': {'c': {'e': {}, 'd': {}}}}}
{'a': {'c': {'b': {'d': {}}}, 'b': {'c': {'e': {}, 'd': {}}}}}
{'e': {}, 'd': {}}


## Trie extending UserDict

# Dictrie

> Note: this is Python 2.7 compatible, not Python 3 (for now)

## Trie Overview

A ***trie*** is a search tree that optimizes word dictionary traversal by organizing words in a tree, character by character. 

Given a dictionary containing **"hell", "hello",** and **"help"**, the following tree represents the words in our dictionary:
```
             h
             |
             e
             |
             l
            / \
           /   \
          p     l
          |    / \
          _   _   o
         
```

You can follow any path down the tree, keeping track of the characters, until you hit an underscore. At this point, you have a valid word. This data structure is particularly useful for efficient autocorrection or finding the shortest/longest word that starts with a given substring.

In Python, a trie can be represented with nested dictionaries like so:

```python
{'h': {'e' : {'l' : {'l': {
                            'o': {
                                  ' ': {}
                                },
                            ' ': {}
                        },
                     'p': {
                           ' ': {}
                          },
                    }
               }
      }
```

where, instead of an underscore, we indicate the end of valid word with a space and empty dictionary. 

This library extends common dictionary indexing to work with tries, for example, `trie['hel']` returns a subtrie:
```python
trie['hel']
>>> {'p': {' ': {}}, 
     'l': {' ': {}, 
           'o': {' ': {}}}}
```

and 

```python
'hello' in trie
>>> True
```

tests whether a key is contained in the trie. It also adds features like iterating over the words in the trie, etc.

### Quickstart

To start using `Dictrie`, clone this repository:

```bash
git clone https://github.com/sufyanAbbasi/dictrie
```

or download the `dictrie.py` file here: https://github.com/sufyanAbbasi/dictrie/dictrie.py

Move this file to your working directory (sorry, no `pip` yet!) and run the following:

```python
from dictrie import Dictrie

if __name__ == "__main__":
    #initialize a trie with an existing word list
    trie = Dictrie(['hell', 'help', 'hello'])
    
    #add some more words to the list
    trie.build_list(['hellish', 'hellcat'])
    
    #or
    
    for word in ['hellish', 'hellcat']:
        trie[' '] = word
    
    #access a subtrie:
    trie['hel']
    
    #test if a key exists in the trie:
    'hel' in trie
    
    #test if a word exists in the trie:
    trie.is_word('hello')
    
    #iterate over all the words in a trie
    for word in trie
        print word
        
    #iterate over the words that start with a given string:
    for word in trie.get_words('hell'):
        print word
    
```


### Initialization
Initialize a bare trie by:

```python
trie = Dictrie()
```

Or by supplying any number of iterables (list, set, etc.) of words:

```python
trie = Dictrie(['hell', 'help', 'hello'], {'hellish', 'hellcat'})
```

### Building a Trie
The following two methods adds words to the dictionary:
As a function:
```python
trie.build_trie(['hell', 'help', 'hello'])
```
Through iteration:
```python
for word in ['hell', 'help', 'hello']:
    trie[' '] = word
```
Here, the key is ignored in this form and each word is automatically placed in the trie.

### Accessing a Subtrie
The Dictrie class extends dictionaries to allow indexing by word substrings. For example:
```python
trie['hel']
```
produces a subtrie of the words that start with the key:
```python
>>> {'p': {' ': {}}, 'l': {' ': {}, 'o': {' ': {}}}}
```

### Testing for Existence
The Dictrie class supports the ```in``` keyword, which checks if the sequence of characters exists in the trie:
```python
'hel' in trie
>>> True
```

To test if a valid word exists in the trie, use the ```is_word(<string>)``` method:
```python
trie.is_word('hel')
>>> False

trie.is_word('hello')
>>> True
```

### Iteration
```python
for word in trie:
    print word
```
prints every word in the trie, from shortest to longest:

```
>>> hell 
    help 
    hello
```

```trie.get_words(<string>)``` returns a generator that iterates over the words that begin with <string> in alphabetical order.

```python
for word in trie.get_words('hell'):
    print word
```
prints every word that begins with **hell** from shortest to longest:

```
>>> hell 
    hello
```

### Testing Dictrie:
[github.com/dwy/english-words](https://github.com/dwyl/english-words) is a fantastic github repo with over 450,000 english words. Download the text file, [words_alpha.txt](https://github.com/dwyl/english-words/blob/master/words_alpha.txt), and place it in your working directory, and run:

```python
with open('words_alpha.txt') as fp:
    for word in fp:
        trie[' '] = word.strip()
```

which builds a trie containing all of the available words. Then run:

```python
for word in trie.get_words('trie'):
    print word
```
to list all words in the dictionary which starts with **trie** in size order:

```
>>> tried
    trier
    tries
    triene
    triens
    triers
    triedly
    ...
    trieciously
    triennially
    trierarchal
    trierarchic
    trienniality
    trierarchies
    triethylamine
    triethanolamine
    triethylstibine

```

## To-Do List
* Limit key type to strings
* Figure out how to better deal with iteration on a subtrie
    * Iterate on the subtrie itself or the words in the list?
* Test for robustness

## Credit
Luciano Romalho's, [**Fluent Python**](http://shop.oreilly.com/product/0636920032519.do), is an amazing resource for taking your Python skill to the next level. I would highly recommend picking it up!

In [413]:
from UserDict import UserDict
from collections import deque

class Dictrie(UserDict, object):

    def __init__(self, *wordslists, **kwargs):
        init_trie = kwargs.get('dict', {})
        super(Dictrie, self).__init__(init_trie)
        for words in wordslists:
            self.build_trie(words)

    # returns if word is a valid word in the dictionary
    def is_word(self, word):
        return word in self and ' ' in self[word]

    # returns a generator to produce all words in the trie beginning with
    # the root from shortest to longest
    def get_words(self, root):
        queue = deque([root])
        while queue:
            curr_str = queue.popleft()
            if not self[curr_str]:
                yield curr_str.strip()
            else:
                queue.extend(curr_str + key for key in sorted(self[curr_str].iterkeys()))

    # builds the trie given an iterator of words
    def build_trie(self, words):
        words = list(words)
        for word in words:
            self[' '] = word

    def __iter__(self):
        queue = deque(sorted(self.iterkeys()))
        while queue:
            curr_str = queue.popleft()
            if not self[curr_str]:
                yield curr_str.strip()
            else:
                queue.extend(curr_str + key for key in sorted(self[curr_str].iterkeys()))

    def __contains__(self, key):
        sub_trie = self.data
        for char in key:
            if char in sub_trie:
                sub_trie = sub_trie[char]
            else:
                return False
        return True

    def __getitem__(self, key):
        sub_trie = self.data
        for char in key:
            if char in sub_trie:
                sub_trie = sub_trie[char]
            else:
                raise KeyError(key)
        return sub_trie

    def __setitem__(self, key, item):
        sub_trie = self.data
        for char in item.strip() + ' ':
            sub_trie = sub_trie.setdefault(char, {})

In [420]:
# with open('words_alpha.txt') as fp:
#     for word in fp:
#         trie[' '] = word.strip()

trie = Dictrie(dict=haha)

In [421]:
print haha
print trie

{'e': {'l': {'p': {' ': {}}, 'l': {' ': {}, 'o': {' ': {}}}}}}
{'e': {'l': {'p': {' ': {}}, 'l': {' ': {}, 'o': {' ': {}}}}}}


## Closure

In [10]:
def make_averager():
    av = {'count': 0, 'total': 0}
    def averager(new_value):
        av['count'] += 1.
        av['total'] += new_value
        return av['total'] / av['count']
    return averager

avg = make_averager()
print avg(10)
print avg(11)
print avg(12)

10.0
10.5
11.0


## Decorators

In [11]:
import time
import functools

def clock(func):
    @functools.wraps(func)
    def clocked(*args, **kwargs):
        t0 = time.clock()
        result = func(*args, **kwargs)
        elapsed = time.clock() - t0
        name = func.__name__
        arg_list = []
        if args:
            arg_list.append(', '.join(repr(arg) for arg in args))
        if kwargs:
            pairs = ['%s=%r' % (k, w) for k,w in sorted(kwargs.items())]
            arg_list.append(','.join(pairs))
        arg_str = ', '.join(arg_list)
        print '[%0.8fs] %s(%s) -> %r' % (elapsed, name, arg_str, result)
        return result
    return clocked

def fib_cache(func):
    cache = {}
    def cached(n):
        if n in cache:
            return cache[n]
        else:
            val = func(n)
            cache[n] = val
            return val
    return cached
        

In [14]:
@clock
@fib_cache
def factorial(n):
    return 1 if n < 2 else n*factorial(n - 1)

factorial(20)

[0.00000700s] cached(1) -> 1
[0.00066700s] cached(2) -> 2
[0.00079900s] cached(3) -> 6
[0.00104000s] cached(4) -> 24
[0.00120700s] cached(5) -> 120
[0.00127900s] cached(6) -> 720
[0.00148400s] cached(7) -> 5040
[0.00161700s] cached(8) -> 40320
[0.00181600s] cached(9) -> 362880
[0.00368000s] cached(10) -> 3628800
[0.00379300s] cached(11) -> 39916800
[0.00387700s] cached(12) -> 479001600
[0.00396200s] cached(13) -> 6227020800
[0.00402700s] cached(14) -> 87178291200
[0.00423300s] cached(15) -> 1307674368000
[0.00443100s] cached(16) -> 20922789888000
[0.00478200s] cached(17) -> 355687428096000
[0.00502100s] cached(18) -> 6402373705728000
[0.00527700s] cached(19) -> 121645100408832000
[0.00559400s] cached(20) -> 2432902008176640000


2432902008176640000

## Parameterized Decorators

In [16]:
import time

DEFAULT_FMT = '[{elapsed:0.8f}s] {name}({args}) -> {result}'

def clock(fmt=DEFAULT_FMT):
    def decorate(func):
        def clocked(*_args):
            t0 = time.time()
            _result = func(*_args)
            elapsed = time.time() - t0
            name = func.__name__
            args = ', '.join(repr(arg) for arg in _args)
            result = repr(_result)
            print(fmt.format(**locals()))
            return _result
        return clocked
    return decorate

In [19]:
# @clock()
@clock('{name}: {elapsed}s')
def snooze(seconds):
    time.sleep(seconds)

for i in range(3):
    snooze(.123)

snooze: 0.126451969147s
snooze: 0.125724077225s
snooze: 0.124055147171s


## Class Methods

In [34]:
from array import array
import math

class Vector2D:
    typecode = 'd'
    
    def __init__(self, x, y):
        self.__x = float(x)
        self.__y = float(y)
    
    def angle(self):
        return math.atan2(self.y, self.x)
    
    @property
    def x(self):
        return self.__x
    
    @property
    def y(self):
        return self.__y
        
    def __iter__(self):
        #yield self.x; yield self.y
        return (i for i in (self.x, self.y))
    
    def __repr__(self):
        class_name = type(self).__name__
        return '{}({!r}, {!r})'.format(class_name, *self)
    
    def __format__(self, fmt_spec=''):
        if fmt_spec.endswith('p'):
            fmt_spec = fmt_spec[:-1]
            coords = (abs(self), self.angle())
            outer_fmt = '<{}, {}>'
        else:
            coords = self
            outer_fmt = '({}, {})'
            
        components = (format(c, fmt_spec) for c in coords)
        return outer_fmt.format(*components)
    
    def __str__(self):
        return str(tuple(self))
    
    def __bytes__(self):
        return (bytes([ord(self.typecode)]) + 
                bytes(array(self.typecode, self)))
    
    def __eq__(self, other):
        return tuple(self) == tuple(other)
    
    def __hash__(self):
        return hash(self.x) ^ hash(self.y)
    
    def __abs__(self):
        return math.hypot(self.x, self.y)
    
    def __bool__(self):
        return bool(abs(self))
    
    @classmethod
    def frombytes(cls, octets):
        typecode = chr(octets[0])
        memv = memoryview(octets[1:]).cast(typecode)
        return cls(*memv)

In [35]:
v1 = Vector2D(3,4)
print format(v1)
print format(v1, '.2f')
print format(v1, '.3e')
print format(v1, '.3p')

(3.0, 4.0)
(3.00, 4.00)
(3.000e+00, 4.000e+00)
<5.0, 0.927>
