In [12]:
import sys
sys.path.append('.')
sys.path.append('..')
from problem_loader import ProblemLoader
from helpers import obfuscate

data_urls = {
    'problem1': 'https://d18ky98rnyall9.cloudfront.net/_eed1bd08e2fa58bbe94b24c06a20dcdb_huffman.txt?Expires=1627948800&Signature=ZEZ81ZVRkTRzV-F32qYfvWKxfZUBDL4uG63ZCUbUojYW4d3~0yfqIG2lNqac1o-xoR6oyaigMs~nAk07oRREmVXETNcNMROenZx2bHs9cuIbBTh8NOpWg2-lLx79YCQ-T0OWRwhGEG49ilJ-mmwHmef5F~Ryxdwb8PLimC0UV4k_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A',
    'problem3': 'https://d18ky98rnyall9.cloudfront.net/_790eb8b186eefb5b63d0bf38b5096873_mwis.txt?Expires=1627948800&Signature=UMuBiuG~VCH6FQU3KCSWfUCk0zOrNhhVCW3v1rpM-cvCcBifwayrJpWwJFcT3~NBt~Yjqob48Pqr-DqlzsTdlNYO4uj2Kz6JbeT6L0dQgd6PVk7mYE4JZs4WREQlL9JJZLYWjisLiugad3NwDceXvXvW85t2OPdrNsDvhWti8BA_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A'
}

# Problem 1
In this programming problem and the next you'll code up the greedy algorithm from the lectures on Huffman coding.

This file describes an instance of the problem. It has the following format:

[number_of_symbols]

[weight of symbol #1]

[weight of symbol #2]

...

For example, the third line of the file is "6852892," indicating that the weight of the second symbol of the alphabet is 6852892.  (We're using weights instead of frequencies, like in the "A More Complex Example" video.)

**Your task in this problem is to run the Huffman coding algorithm from lecture on this data set. What is the maximum length of a codeword in the resulting Huffman code?**

### ADVICE: 
If you're not getting the correct answer, try debugging your algorithm using some small test cases. And then post them to the discussion forum!

In [13]:
values = ProblemLoader(
    data_urls['problem1'], 
    fname="weighted-graph.p", 
).fetch()
print(values[:10])

[1000, 7540662, 6852892, 3235725, 8045172, 2667794, 2595511, 7030103, 5882478, 2731795]


### Input: 
a nonnegative frequency $p_a$ for each symbol $a$ of an alphabet $\sum$.  

### Output: 
the $\sum$-tree with minimum average leaf depth, representing the prefix-free binary code with minimum average encoding length.  

`// Initialization`  
for each $a \in \Sigma$ do  
&nbsp;  $T_a$ := tree containing one node, labeled “a”  
&nbsp;  $P(T_a)$ := $p_a$  
$\mathcal{F}$ := $\{T_a\}_{a\in\Sigma}$ `// invariant: `$\forall T \in \mathcal{F}, P(T)= \sum\limits_{a \in T}^{} p_a$  
`// Main loop`  
while $\mathcal{F}$ contains at least two trees do  
&nbsp;  $T_1 := argmin_{T \in\mathcal{F}} P(T)$ `// min frequency sum`  
&nbsp;  $T_2$ := $argmin_{T \in\mathcal{F},T \neq T_1} P(T)$ `// second-smallest`  
&nbsp;  remove $T_1$ and $T_2$ from $\mathcal{F}$  
&nbsp;  `// roots of `$T_1$, $T_2$ `become left, right children of a new internal node`  
&nbsp;  $T_3$ := merger of $T_1$ and $T_2$  
&nbsp;  $P(T_3)$ := $P(T_1) + P(T_2)$ `// maintains invariant`  
&nbsp;  add $T_3$ to $\mathcal{F}$  
return the unique tree in $\mathcal{F}$ 

In [14]:
from collections import namedtuple
from heapq import heapify, heappop, heappush

Node = namedtuple('Node', ['index', 'weight', 'left', 'right'])

def P(node: Node):
  if node == None:
    return 0
  return node.weight + P(node.left) + P(node.right)

def Huffman(values):
  forest = []
  for i, a in enumerate(values):
    node = Node(index=i, weight=a, left=None, right=None)
    forest.append((P(node), node))

  heapify(forest)
  while(len(forest) > 1):
    tree_one = heappop(forest)[1]
    tree_two = heappop(forest)[1]
    node = Node(index=None, weight=0, left=tree_one, right=tree_two)
    heappush(forest, (P(node), node))

  tree = forest[0][1]
  return tree

In [15]:
tree = Huffman(values)

def max_recurse_tree(tree:Node, count=0):
  if tree == None:
    return count
  if tree.left == None and tree.right == None:
    return count - 1
  return max(max_recurse_tree(tree.left, count+1), max_recurse_tree(tree.right, count+1)) 

obfuscate(max_recurse_tree(tree))

In [16]:
from ppbtree import print_tree

print_tree(tree, nameattr='index', left_child='left', right_child='right')

                                             ┌392
                                        ┌None┤
                                        |    └614
                                   ┌None┤
                                   |    |    ┌62
                                   |    └None┤
                                   |         |    ┌280
                                   |         └None┤
                                   |              └725
                              ┌None┤
                              |    |         ┌989
                              |    |    ┌None┤
                              |    |    |    └700
                              |    └None┤
                              |         |    ┌690
                              |         └None┤
                              |              |         ┌413
                              |              |    ┌None┤
                              |              |    |    └571
                              |              └None┤
 

# Problem 2

Continuing the previous problem, what is the minimum length of a codeword in your Huffman code?

In [17]:
def min_recurse_tree(tree:Node, count=0):
  if tree == None or tree.index != None:
    return count
  if tree.left == None and tree.right == None:
    return count
  return min(min_recurse_tree(tree.left, count+1), min_recurse_tree(tree.right, count+1))

obfuscate(min_recurse_tree(tree))

# Question 3

In this programming problem you'll code up the dynamic programming algorithm for computing a maximum-weight independent set of a path graph. 

Your task in this problem is to run the dynamic programming algorithm (and the reconstruction procedure) from lecture on this data set.  The question is: of the vertices 1, 2, 3, 4, 17, 117, 517, and 997, which ones belong to the maximum-weight independent set?  (By "vertex 1" we mean the first vertex of the graph---there is no vertex 0.)   

In the box below, enter a 8-bit string, where the ith bit should be 1 if the ith of these 8 vertices is in the maximum-weight independent set, and 0 otherwise. For example, if you think that the vertices 1, 4, 17, and 517 are in the maximum-weight independent set and the other four vertices are not, then you should enter the string 10011010 in the box below.

In [18]:
values = ProblemLoader(
    data_urls['problem3'], 
    fname="weighted-graph_2.p", 
).fetch()
print(values[:10])

[1000, 4962786, 6395702, 5601590, 3803402, 6784626, 4944482, 2882725, 9310662, 5247184]


In [19]:
Path = namedtuple('Path', ['vertex', 'weight', 'next'])

path_nodes = [ Path(v, w, None) for v,w in enumerate(values) ]
i = len(path_nodes) - 1
while i > 0:
  path_nodes[i-1] = path_nodes[i-1]._replace(next=path_nodes[i])
  i -= 1

path = path_nodes[0]

### WIS  
#### Input: 
a path graph G with vertex set $\{v_1, v_2,...,v_n\}$  and a nonnegative weight $w_i$ for each vertex $v_i$.  
#### Output: 
the total weight of a maximum-weight independent set of $G$.  
$A$ := length-$(n + 1)$ array `// subproblem solutions`    
$A[0]$ := 0 `// base case #1`  
$A[1]$ := $w_1$ `// base case #2`  
for i = 2 to n do  `// use recurrence`  
$A[i]$ := $max\{A[i - 1], A[i - 2] + w_i \}$    
return $A[n]$ `// solution to largest subproblem `

In [20]:
A = [None] * (len(path_nodes) + 1)
A[0] = 0
A[1] = path_nodes[0].weight
for i in range(2, len(path_nodes) + 1):
  A[i] = max(A[i-1], A[i-2] + path_nodes[i - 1].weight)
  
print(A[len(path_nodes)])


2955353732


### WIS Reconstruction  
#### Input: 
the array $A$ computed by the WIS algorithm for a path graph $G$ with vertex set $\{v_1, v_2,...,v_n\}$ and a nonnegative weight $w_i$ for each vertex $v_i$.  
#### Output: 
a maximum-weight independent set of $G$. 
 
$S$ := $\empty$ `// vertices in an MWIS`  
$i$ := $n$  
while $i \ge 2$ do  
&nbsp; if $A[i - 1] \ge A[i - 2] + w_i$ then `// Case 1 wins`  
&nbsp;&nbsp; $i$ := $i - 1$ `// exclude `$v_i$  
&nbsp; else `// Case 2 wins`  
&nbsp;&nbsp; $S$ := $S \cup \{v_i\}$ `// include `$v_i$  
&nbsp;&nbsp; $i$ := $i - 2$ `// exclude `$v_i$  
if $i = 1$ then `// base case #2`  
&nbsp; $S$ := $S \cup \{v_1\}$  
return $S$ 

In [21]:
S = set()
i = len(path_nodes)
while i >= 2:
  if A[i - 1] >= A[i - 2] + path_nodes[i-1].weight:
    i -= 1
  else:
    S.add(path_nodes[i-1].vertex)
    i -= 2
if i == 1:
  S.add(path_nodes[0].vertex)
print(len(S), len(path_nodes))

459 1001


In [22]:
def bin_code(*seq):
  return ''.join(['1' if i in S else '0' for i in seq])

obfuscate(bin_code(1, 2, 3, 4, 17, 117, 517, 997)) 