### Hill Climbing

In [1]:
import random

# setting a constant seed for reproducibility
SEED = 42

def problem(N, seed=SEED):
    random.seed(seed)
    return [
        list(set(random.randint(0, N - 1) for n in range(random.randint(N // 5, N // 2))))
        for n in range(random.randint(N, N * 5))
    ]

for n in [5]:
    print(f"N = {problem(n)}")

N = [[0], [1], [0], [4], [0], [1], [4], [4], [4], [1, 3], [0, 1], [2], [1], [0], [0, 2], [2, 4], [3], [3], [4], [2, 4], [0], [1], [0, 1], [3], [2, 3]]


In [2]:
from collections import Counter
from copy import copy

def hill_climbing(N, all_lists):
    all_lists = set(tuple(_) for _ in all_lists)
    
    def evaluate(state):
        cnt = Counter()
        cnt.update(sum((e for e in state), start=()))
        return len(cnt), -cnt.total()
    
    def tweak(solution):
        new_solution = set(solution)
        # randomly remove or add a list
        while new_solution and random.random() < 0.7:
            r = random.choice(list(new_solution))
            new_solution.remove(r)
        while random.random() < 0.7:
            a = random.choice(list(all_lists - solution))
            new_solution.add(a)
        return new_solution

    current_solution = set()
    useless_steps = 0
    while useless_steps < 10000:
        useless_steps += 1
        candidate_solution = tweak(current_solution)
        if evaluate(candidate_solution) > evaluate(current_solution):
            current_solution = copy(candidate_solution)
            useless_steps = 0
            
    return current_solution

In [None]:
for N in [5, 10, 20, 100, 500, 1000]:
    solution = hill_climbing(N, problem(N, seed=42))
    print(
        f" Solution for N={N:,}: "
        + f"w={sum(len(_) for _ in solution):,} "
        + f"(bloat={(sum(len(_) for _ in solution)-N)/N*100:.0f}%)"
    )