In [9]:
from random import random, seed
from itertools import product
import matplotlib.pyplot as plt
import numpy as np
from math import sqrt

from icecream import ic

In [10]:
UNIVERSE_SIZE = 1000
NUM_SETS = 200
DENSITY = 0.2

rng = np.random.Generator(
    np.random.PCG64([UNIVERSE_SIZE, NUM_SETS, int(10_000 * DENSITY)])
)

In [11]:
# DON'T EDIT THESE LINES!

SETS = np.random.random((NUM_SETS, UNIVERSE_SIZE)) < DENSITY
for s in range(UNIVERSE_SIZE):
    if not np.any(SETS[:, s]):
        SETS[np.random.randint(NUM_SETS), s] = True
COSTS = pow(SETS.sum(axis=1), 1.1)

In [12]:
valid = lambda solution: np.all(np.logical_or.reduce(SETS[solution]))
cost = lambda solution: COSTS[solution].sum()

In [None]:
# Pre-processing

tabu = []
for i, cur_set in enumerate(SETS):
    if cur_set.sum() == 0:
        # empty set
        tabu.append(i)
# plt.imshow(SETS)
ic(tabu)

In [14]:
from numpy import ceil, floor, ndarray


def plot(solution):
    plt.figure(figsize=(10, 10))
    plt.imshow(SETS[solution])
    plt.show()


def acceptable(geneset):
    try:
        return not any([geneset[t] for t in tabu]) and valid(geneset)
    except TypeError:
        return False


def mutate(
    genesets,
    min_mutation,
    max_mutation,
):
    """Tweaks the solution by adding/removing a random set"""
    new_genesets = genesets.copy()
    for new_geneset in new_genesets:
        mutating_genes = None
        while (
            mutating_genes is None
            or any(gene in tabu for gene in mutating_genes)
            or not valid(new_geneset)
        ):
            mutating_genes = rng.integers(
                0,
                NUM_SETS,
                rng.integers(
                    UNIVERSE_SIZE*min_mutation,
                    UNIVERSE_SIZE*max_mutation,
                    endpoint=True,
                ),
            )
            new_geneset[mutating_genes] = ~new_geneset[mutating_genes]

    return new_genesets

In [None]:
# Const
ROUNDS = 5
MUTATION_PERCENT = (0.01, 0.03)
POPULATION_SIZE = 5

print(
    f"Mutating genes: {UNIVERSE_SIZE * MUTATION_PERCENT[0]:.0f} to {UNIVERSE_SIZE * MUTATION_PERCENT[1]:.0f}"
)

In [None]:
from IPython.display import display, clear_output
import time

fig1 = plt.figure(figsize=(10, 10))
ax1 = fig1.add_subplot(111)
# ax1.scatter([])

best_geneset_overall = None
best_cost_overall = float("inf")
best_round = 0
best_round_iterations = 0
total_iterations = 0

start = time.time()
for round in range(1, ROUNDS + 1):
    # Init
    genesets = np.empty((POPULATION_SIZE, NUM_SETS), dtype=bool)
    for i in range(POPULATION_SIZE):
        geneset = None
        while not valid(geneset):
            geneset = rng.random(NUM_SETS) < 0.5
        genesets[i] = geneset
    costs = np.array([cost(g) for g in genesets])
    best_cost = min(costs)
    last_best_cost = best_cost
    iters = 0
    stale_iterations = 0
    history = [costs]
    #history = [last_best_cost]
    while True:
        iters += 1
        stale_iterations += 1
        population = mutate(
            genesets,
            MUTATION_PERCENT[0],
            MUTATION_PERCENT[1],
        )
        #genesets, costs = select(population, n=POPULATION_SIZE)
        costs = np.array([cost(g) for g in population])
        best_cost = np.argmin(costs)


        if costs[best_cost] < last_best_cost:
            stale_iterations = 0
            last_best_cost = costs[best_cost]
            genesets = np.repeat(population[best_cost][np.newaxis, :], POPULATION_SIZE, axis=0)

        history.append(costs)
        #history.append(last_best_cost)
        if iters % 100 == 0:
            ax1.cla()
            ax1.set_title(f"Round {round}/{ROUNDS} - Best cost: {last_best_cost:.2f}")
            for i, h in enumerate(history):
               ax1.scatter([i]*len(h), h, color='blue', alpha=1/POPULATION_SIZE, marker='.')
            #ax1.plot(history)
            ax1.set_yscale("log")
            display(fig1)
            clear_output(wait=True)
            plt.pause(0.1)

        if iters > UNIVERSE_SIZE and stale_iterations / iters > 0.3:
            break
    total_iterations += iters

    if last_best_cost < best_cost_overall:
        best_cost_overall = last_best_cost
        best_geneset_overall = genesets[0]
        best_round = round
        best_round_iterations = iters
    if last_best_cost <= UNIVERSE_SIZE:
        print("Found optimal solution")
        break
elapsed = time.time() - start

In [None]:
minutes = int(elapsed // 60)
seconds = elapsed % 60
print(f"Elapsed time: {minutes}m {seconds:.2f}s")
print(f"Best cost: {best_cost_overall}")
print(f"Total number of iterations: {total_iterations}")
print(f"Best round: {best_round}/{ROUNDS}")
print(f"Iterations in round {best_round}: {best_round_iterations}")

print(f"Selected sets: {list(np.nonzero(best_geneset_overall))}")
# plt.imshow(SETS[best_geneset_overall])

if not valid(best_geneset_overall):
    print("Something terrible happened!")