In [1]:
import sys
sys.path.append('.')
sys.path.append('..')
from problem_loader import ProblemLoader
from helpers import obfuscate

data_urls = {
    'problem1': 'https://d18ky98rnyall9.cloudfront.net/_6dfda29c18c77fd14511ba8964c2e265_knapsack1.txt?Expires=1628985600&Signature=k-eIsZc9GtX5yzAVlwKTfQMgsUDnI~xbkalhKbeK8KywM-fGsBYZI7us4rernHZJPm~K91zs0ihN3qTFfwp-lkEreNB7MfTIHSzENtqmEIhTVqKAHl77P8Fiuw7ooqDvVpNVBGp30bptiF8GoSy01Y5UlY5Z7AHJ1UrX0Sa0Hhg_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A',
    'problem2': 'https://d18ky98rnyall9.cloudfront.net/_6dfda29c18c77fd14511ba8964c2e265_knapsack_big.txt?Expires=1629072000&Signature=Ut7vbr2sDSmQ8Ys8h31shBrJ-iY0ZqhSYvS1hcCH3xuOR75Pc9hL6h1Vr47OalZc9~iVRxR50Q89R83NYUYxqTHH~iUAOLnzWUcYINeC7sZjKP5YG-hOA-TvT5ELoW9REsuRIpD4zWVB2Aw2B2Wnnl07m9vRv6pIzqHT1h7UWoY_&Key-Pair-Id=APKAJLTNE6QMUY6HBC5A'
}

# Problem 1

In this programming problem and the next you'll code up the knapsack algorithm from lecture.

Let's start with a warm-up.

This file describes a knapsack instance, and it has the following format:

[knapsack_size][number_of_items]

[value_1] [weight_1]

[value_2] [weight_2]

...

For example, the third line of the file is "50074 659", indicating that the second item has value $50074$ and size $659$, respectively.

You can assume that all numbers are positive.  You should assume that item weights and the knapsack capacity are integers.

### ADVICE: 
If you're not getting the correct answer, try debugging your algorithm using some small test cases. And then post them to the discussion forum!

In [2]:
from math import inf
from collections import namedtuple

WeightedItem = namedtuple('WeightedItem', ['value', 'weight'])

def process_weighted_items(data):
  items = {}
  items['v'] = []
  first_line = True
  for item in data.split(b'\n'):
    sa = item.decode('utf-8').split(' ')
    if first_line:
      items['C'] = int(sa[0])
      first_line = False
      continue
    if len(sa) == 2:
      items['v'].append(WeightedItem(value=int(sa[0]), weight=int(sa[1])))
  return items

items = ProblemLoader(
    data_urls['problem1'], 
    fname="warmup.p", 
    preprocessor=process_weighted_items,
).fetch(clear=True)
values = items['v']
C = items['C']
print(C, values[:10])

10000 [WeightedItem(value=16808, weight=250), WeightedItem(value=50074, weight=659), WeightedItem(value=8931, weight=273), WeightedItem(value=27545, weight=879), WeightedItem(value=77924, weight=710), WeightedItem(value=64441, weight=166), WeightedItem(value=84493, weight=43), WeightedItem(value=7988, weight=504), WeightedItem(value=82328, weight=730), WeightedItem(value=78841, weight=613)]


## Knapsack  

### Input: 
item values $v_1,...,v_n$, item sizes $s_1,...,s_n$, and  a knapsack capacity $C$ (all positive integers).  

### Output: 
the maximum total value of a subset  $S \subseteq {1, 2,...,n}$ with $\sum_{i \in S} s_i \leqslant C$.  

`// subproblem solutions (indexed from 0)`  
$A$ := $(n + 1) \times (C + 1)$ two-dimensional array  
`// base case (i = 0)`  
for $c = 0$ to $C$ do  
&nbsp;  $A[0][c] = 0$  
`// systematically solve all subproblems`  
for $i = 1$ to $n$ do  
&nbsp;  for $c = 0$ to $C$ do  
`// use recurrence`  
&nbsp;&nbsp;  if $s_i > c$ then  
&nbsp;&nbsp;&nbsp;  $A[i][c] := A[i - 1][c]$  
&nbsp;&nbsp;  else  
&nbsp;&nbsp;&nbsp;  $A[i][c] :=  max{\underbrace{A[i - 1][c]}_{\textrm{Case 1}}, \underbrace{A[i - 1][c - s_i] + v_i}_{\textrm{Case 2}}}$  
return $A[n][C]$ `// solution to largest subproblem`

In [3]:
A = []
for c in range(C):
  A.append([None] * C)
  A[0][C - 1] = 0

n = len(values)

for i in range(n):
  for c in range(C):
    if  values[i].weight > c:
      A[i][c] = A[i-1][c] or 0
    else:
      case_1 = A[i-1][c] or 0
      case_2 = A[i-1][c-values[i].weight] or 0
      case_2 += values[i].value
      A[i][c] = max(case_1, case_2)

obfuscate(A[n - 1][C - 1])

# Problem 2

This problem also asks you to solve a knapsack instance, but a much bigger one. 

This file describes a knapsack instance, and it has the following format:

[knapsack_size][number_of_items]

[value_1] [weight_1]

[value_2] [weight_2]

...

For example, the third line of the file is "50074 834558", indicating that the second item has value 50074 and size 834558, respectively.  As before, you should assume that item weights and the knapsack capacity are integers.

This instance is so big that the straightforward iterative implemetation uses an infeasible amount of time and space.  So you will have to be creative to compute an optimal solution.  One idea is to go back to a recursive implementation, solving subproblems --- and, of course, caching the results to avoid redundant work --- only on an "as needed" basis.  Also, be sure to think about appropriate data structures for storing and looking up solutions to subproblems.

### ADVICE: 
If you're not getting the correct answer, try debugging your algorithm using some small test cases. And then post them to the discussion forum!

In [4]:
items = ProblemLoader(
    data_urls['problem2'], 
    fname="q2.p", 
    preprocessor=process_weighted_items,
).fetch()
values = items['v']
C = items['C']
print(C, values[:10])

2000000 [WeightedItem(value=16808, weight=241486), WeightedItem(value=50074, weight=834558), WeightedItem(value=8931, weight=738037), WeightedItem(value=27545, weight=212860), WeightedItem(value=77924, weight=494349), WeightedItem(value=64441, weight=815107), WeightedItem(value=84493, weight=723724), WeightedItem(value=7988, weight=421316), WeightedItem(value=82328, weight=652893), WeightedItem(value=78841, weight=402599)]


I feel like this was suggested, but it is $O(2^n)$

$n$ values
$n$ weights

we can use recursion
- $A$ weight and value array
- $W$ current weight to solve
- $i$ index in array to consider

for $i$ in $f_{\textrm{last index}}(A)$ ..0:
  - if $f_{\textrm{weight}}(A[i])  > W$ `// skip it`  
      return $f_{\textrm{recurse}}(A, W, i - 1))$
  - else `// either this weight should be included, or not.`
    - return $f_{\textrm{max total value}}($
      $$f_{\textrm{recurse}}(A, W - f_{\textrm{weight}}(A[i]), i - 1),$$
      $$f_{\textrm{recurse}}(A, W, i - 1))$$

In [5]:
from functools import cache as memoize

def recurse_knapsack(A, W, i):
  @memoize
  def recursive_knapsack(W,i):
    nonlocal A
    if i == 0 or W == 0:
      return 0
      
    w = A[i].weight
    if w > W:
      case = recursive_knapsack(W, i - 1) or 0
    else:
      left = recursive_knapsack(W, i - 1) or 0
      right = recursive_knapsack(W -  w, i -  1) or 0
      right += A[i].value
      case = max(left, right)
    
    return case
    
  return recursive_knapsack(W, i)


In [6]:
from sys import setrecursionlimit
setrecursionlimit(len(values) ** 2)

obfuscate(recurse_knapsack(values, C, len(values) - 1))

In [7]:
A = [0] * C

n = len(values)

if False: # this takes considerably longer, not benefitting from memoization
  for i in range(n):
    B = [0] * C
    for c in range(C):
      if  values[i].weight > c:
        B[c] = A[c]
      else:
        case_1 = A[c] or 0
        case_2 = A[c-values[i].weight] or 0
        case_2 += values[i].value
        B[c] = max(case_1, case_2)
    A = B

  print(A[C - 1])