In [4]:
#https://www.reddit.com/r/dailyprogrammer/comments/3x3hqa/20151216_challenge_245_intermediate_ggggggg_gggg/

In [5]:
import re
from queue import PriorityQueue

def decodeGg(key, str):
    key = genKeyDict(key)
    
    curr = ''
    res = ''
    for s in str:
        # if is char, add to curr and check it against key
        if re.match(r'[a-zA-Z]', s):
            curr += s
            if curr in key:
                res += key[curr]
                curr = ''
        # else, just add punctuation and move on
        else:
            res += s
    return res

class Node:
    def __init__(self, val, left=None, right=None):
        self.val = val
        self.left = left
        self.right = right
    def __lt__(self, other):
        return self.val < other.val
    def __repr__(self):
        return str(self.val)

def encoding_keys(str):
    key = dict()
    chars = set(str)
    frequency = { c : str.count(c) for c in chars if re.match(r'[a-zA-Z]', c)}
    
    queue = PriorityQueue()
    # The simplest construction algorithm uses a priority queue where the node with lowest probability is given highest priority:
    
    # Create a leaf node for each symbol and add it to the priority queue.
    for char, freq in frequency.items():
        queue.put((freq, Node(char)))
    
    # While there is more than one node in the queue:
    while(queue.qsize() > 1):
        
        # Remove the two nodes of highest priority
        # (lowest probability) from the queue
        f1, n1 = queue.get()
        f2, n2 = queue.get()
    
        # Create a new internal node with these two nodes
        # as children and with probability equal to the 
        # sum of the two nodes' probabilities.
        node =  Node(n1.val + n2.val, n1, n2)
        
    
        # Add the new node to the queue.
        queue.put((f1+f2, node))
    
    # The remaining node is the root node and the tree is complete.
    _, root = queue.get()
    
    key = huffman_tree2code(root, 'g', 'G')
    
    return key

def huffman_tree2code(root, zero='0', one='1'):
    def bfs(node, result={}, code=''):
        #print(node.val, code)
        # if is leaf
        if node.left is None and node.right is None:
            result[node.val] = code
            return
        
        # left node
        if node.left:
            bfs(node.left, result, code + zero)
        
        # right node
        if node.right:
            bfs(node.right, result, code + one)
        
        return result
    return bfs(root)
    
def genKeyDict(key):
    key = key.split()
    key = [key[i: i+2] for i in range(0, len(key), 2)]
    key = { k[1]: k[0] for k in key}
    return key

def encode(str):
    key = encoding_keys(str)
    encoded = ''
    for s in str:
        if s in key:
            encoded += key[s]
        else:
            encoded += s
    str_key = " ".join([char + ' ' + key[char] for char in sorted(key.keys())])
    return str_key, encoded
        

In [6]:
assert decodeGg("H GgG d gGg e ggG l GGg o gGG r Ggg w ggg", "GgGggGGGgGGggGG, ggggGGGggGGggGg!") == 'Hello, world!'

assert decodeGg("a GgG d GggGg e GggGG g GGGgg h GGGgG i GGGGg l GGGGG m ggg o GGg p Gggg r gG y ggG",\
"GGGgGGGgGGggGGgGggG /gG/GggGgGgGGGGGgGGGGGggGGggggGGGgGGGgggGGgGggggggGggGGgG!") \
        == 'hooray /r/dailyprogrammer!'

In [9]:
msg = """
Here's the thing. You said a "jackdaw is a crow."
Is it in the same family? Yes. No one's arguing that.
As someone who is a scientist who studies crows, I am telling you, specifically, in science, no one calls jackdaws crows. If you want to be "specific" like you said, then you shouldn't either. They're not the same thing.
If you're saying "crow family" you're referring to the taxonomic grouping of Corvidae, which includes things from nutcrackers to blue jays to ravens.
So your reasoning for calling a jackdaw a crow is because random people "call the black ones crows?" Let's get grackles and blackbirds in there, then, too.
Also, calling someone a human or an ape? It's not one or the other, that's not how taxonomy works. They're both. A jackdaw is a jackdaw and a member of the crow family. But that's not what you said. You said a jackdaw is a crow, which is not true unless you're okay with calling all members of the crow family crows, which means you'd call blue jays, ravens, and other birds crows, too. Which you said you don't.
It's okay to just admit you're wrong, you know?
"""
key, encoded = encode(msg)

In [10]:
# sample from /r/dailyprogrammer
test_key = """
A gGgGgGgG B gGgGgGggGg C gGgGgGggGG H GGgGgGgggg I GggGggg L GGgGgGgggG N GGgGgGggGg S GGgGgGggGG T GGgGgGgGg W gGgGgGggg Y gGgGgGGg a GGGG b gGgGgg c gGgg d gGgGG e GGGg f GggGgG g ggggg h gggG i Gggg j GGgGgGG k GGgGgg l GGgGG m ggggG n gGGG o ggG p GggGggG r gGGg s GGgg t GgGg u GgGGG v gGgGgGGG w GgGGg x GGgGgGgGG y GggGG
"""
test_encoded = """
GGgGgGggggGGGggGGgGGGg'GGgg GgGggggGGGGg GgGggggGGggggGGGggggg. gGgGgGGgggGGgGGG GGggGGGGGggggGgGG GGGG "GGgGgGGGGGGgGggGGgGgggGgGGGGGGGgGGg GgggGGgg GGGG gGgggGGgggGGgGGg."
GggGgggGGgg GgggGgGg GggggGGG GgGggggGGGGg GGggGGGGggggGGGGg GggGgGGGGGggggGGgggGGgGGGggGG? gGgGgGGgGGGgGGgg. GGgGgGggGgggG ggGgGGGGGGg'GGgg GGGGgGGggggggGgGGGGggggGGGggggg GgGggggGGGGGGgGg.
gGgGgGgGGGgg GGggggGggggGGGGgggGgGGGGGGg GgGGggggGggG GgggGGgg GGGG GGgggGggGgggGGGggGGGGgGgGgggGGggGgGg GgGGggggGggG GGggGgGgGgGGGgGgGGGgggGGGgGGgg gGgggGGgggGGgGGgGGgg, GggGggg GGGGggggG GgGgGGGgGGgGGGGgGGGggggGGGggggg GggGGggGGgGGG, GGggGggGggGGGGggGggGgggGggGgGGggggGggGGGGGGgGGGGgGGGggGG, GggggGGG GGgggGggGgggGGGggGGGgGggGGGg, gGGGggG ggGgGGGGGGg gGggGGGGGGgGGGGgGGGGgg GGgGgGGGGGGgGggGGgGgggGgGGGGGGGgGGgGGgg gGgggGGgggGGgGGgGGgg. GggGgggGggGgG GggGGggGGgGGG GgGGgGGGGgGGGGgGg GgGgggG gGgGggGGGg "GGggGggGggGGGGggGggGgggGggGgGGggggGgg" GGgGGGgggGGgGggGGGg GggGGggGGgGGG GGggGGGGGggggGgGG, GgGggggGGGGggGGG GggGGggGGgGGG GGgggggGggGGgGGGGGgGGgGgGGgGGG'GgGg GGGgGgggGgGggggGGGGggGGg. GGgGgGgGggggGGGGgGggGG'gGGgGGGg gGGGggGGgGg GgGggggGGGGg GGggGGGGggggGGGGg GgGggggGGggggGGGggggg.
GggGgggGggGgG GggGGggGGgGGG'gGGgGGGg GGggGGGGGggGGGggggGGGggggg "gGgggGGgggGGgGGg GggGgGGGGGggggGGgggGGgGGGggGG" GggGGggGGgGGG'gGGgGGGg gGGgGGGgGggGgGGGGggGGggGGgGggggGGGggggg GgGgggG GgGggggGGGGg GgGgGGGGGGgGgGgGGggGgGGGggGggggGGggggGgg ggggggGGgggGGgGGGGggGggGGggggGGGggggg ggGGggGgG gGgGgGggGGggGgGGggGgGgGGGGggggGgGGGGGGGGGg, GgGGggggGGggggGgggggG GggggGGGgGggGGgGGGgGGGgGgGGGGGgGGgg GgGggggGGggggGGGgggggGGgg GggGgGgGGgggGggggG gGGGGgGGGGgGggGgggGGgGGGGgGggGGgGggGGGggGGgGGgg GgGgggG gGgGggGGgGGGgGGGGGGg GGgGgGGGGGGGggGGGGgg GgGgggG gGGgGGGGgGgGgGGGGGGggGGGGGgg.
GGgGgGggGGggG GggGGggGGgGGGgGGg gGGgGGGgGGGGGGggggGgGGGGggggGGGggggg GggGgGggGgGGg gGggGGGGGGgGGGGgGGGggggGGGggggg GGGG GGgGgGGGGGGgGggGGgGgggGgGGGGGGGgGGg GGGG gGgggGGgggGGgGGg GgggGGgg gGgGggGGGggGggGGGGGgGGGGGggGGGg gGGgGGGGgGGGgGgGGggGggggG GggGggGGGGgggGGggGggGGGgGGGGGg "gGggGGGGGGgGGGGgGG GgGggggGGGGg gGgGggGGgGGGGGGgGggGGgGgg ggGgGGGGGGgGGgg gGgggGGgggGGgGGgGGgg?" GGgGgGgggGGGGgGgGg'GGgg gggggGGGgGgGg ggggggGGgGGGGgGggGGgGggGGgGGGGGgGGgg GGGGgGGGgGgGG gGgGggGGgGGGGGGgGggGGgGgggGgGggGggggGGggGgGGGGgg GggggGGG GgGggggGGGGggGGgGGGg, GgGggggGGGGggGGG, GgGgggGggG.
gGgGgGgGGGgGGGGggggG, gGggGGGGGGgGGGGgGGGggggGGGggggg GGggggGggggGGGGgggGgGGGGGGg GGGG gggGGgGGGggggGGGGGgGGG ggGgGGg GGGGgGGG GGGGGggGggGGGGg? GggGgggGgGg'GGgg gGGGggGGgGg ggGgGGGGGGg ggGgGGg GgGggggGGGGg ggGGgGggggGGGGggGGg, GgGggggGGGGGGgGg'GGgg gGGGggGGgGg gggGggGGgGGg GgGgGGGGGGgGgGgGGggGgGGGggGggggGGggGG GgGGgggGgGGgGGgGggGGgg. GGgGgGgGggggGGGGgGggGG'gGGgGGGg gGgGggggGGgGggggG. gGgGgGgG GGgGgGGGGGGgGggGGgGgggGgGGGGGGGgGGg GgggGGgg GGGG GGgGgGGGGGGgGggGGgGgggGgGGGGGGGgGGg GGGGgGGGgGgGG GGGG ggggGGGGgggggGgGgGggGGGggGGg ggGGggGgG GgGggggGGGGg gGgggGGgggGGgGGg GggGgGGGGGggggGGgggGGgGGGggGG. gGgGgGggGgGgGGGGgGg GgGggggGGGGGGgGg'GGgg gGGGggGGgGg GgGGggggGGGGGGgGg GggGGggGGgGGG GGggGGGGGggggGgGG. gGgGgGGgggGGgGGG GGggGGGGGggggGgGG GGGG GGgGgGGGGGGgGggGGgGgggGgGGGGGGGgGGg GgggGGgg GGGG gGgggGGgggGGgGGg, GgGGggggGGggggGgggggG GgggGGgg gGGGggGGgGg GgGggGGgGgGGGGGGg GgGGGgGGGGGgGGGGGgGGggGGgg GggGGggGGgGGG'gGGgGGGg ggGGGgGggGGGGGggGG GgGGgGgggGgGggggG gGggGGGGGGgGGGGgGGGggggGGGggggg GGGGGGgGGGGgGG ggggGGGGgggggGgGgGggGGGggGGgGGgg ggGGggGgG GgGggggGGGGg gGgggGGgggGGgGGg GggGgGGGGGggggGGgggGGgGGGggGG gGgggGGgggGGgGGgGGgg, GgGGggggGGggggGgggggG ggggGGGGgGGGGgGGGGGgg GggGGggGGgGGG'gGgGG gGggGGGGGGgGGGGgGG gGgGggGGgGGGgGGGGGGg GGgGgGGGGGGGggGGGGgg, gGGgGGGGgGgGgGGGGGGggGGGGGgg, GGGGgGGGgGgGG ggGGgGggggGGGGggGGg gGgGggGggggGGggGgGGGGgg gGgggGGgggGGgGGgGGgg, GgGgggGggG. gGgGgGggggggGGggggGgggggG GggGGggGGgGGG GGggGGGGGggggGgGG GggGGggGGgGGG gGgGGggGgGGG'GgGg.
GggGgggGgGg'GGgg ggGGGgGggGGGGGggGG GgGgggG GGgGgGGGgGGGGGggGgGg GGGGgGgGGggggGGgggGgGg GggGGggGGgGGG'gGGgGGGg GgGGggGGgggGgGGGggggg, GggGGggGGgGGG GGgGgggGGGggGGgGGg?
"""
# as efficient as sample?
assert len(test) >= len(encoded)
decoded = decodeGg(key, encoded)

# decoded correctly?
assert decoded == msg
assert decoded == decodeGg(test_key, test_encoded)
print('tests pass')


tests pass
