In [49]:
%matplotlib notebook  
import numpy as np
import pandas as pd
from simanneal import Annealer
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
from deap import algorithms, base, creator, tools
import random
import warnings
warnings.filterwarnings('ignore')

# <font color = "blue"> Self Assessment: Simulated Annealing for Gerrymandering </font>

In [2]:
# set the data and define the objective function
num_districts = 10
min_voters_in_district = 150
max_voters_in_district = 350
num_cities = 18

dems = [152,81,75,34,62,38,48,74,98,66,83,86,72,28,112,45,93,72]
reps = [62,59,83,52,87,87,69,49,62,72,75,82,83,53,98,82,68,98]
cities = pd.DataFrame( data = {'dems':dems, 'reps':reps})

# same as in Lesson 4
def fitness_districts(assign, cities):
    df = cities.groupby(assign).sum()
    fitness = sum( df['reps'] > df['dems'] )
    total_voters = np.zeros(num_districts,dtype=np.int32)
    total_voters[df.index] = df.sum(axis=1)
    fitness-= np.abs(np.minimum(np.maximum(total_voters,150),350)-total_voters).sum()
    return fitness # we return a tuple with the fitness

# for printing out an assignment of cities to districts
def summarize_districts(assign, cities):
    reps = np.zeros(num_districts, dtype=np.int32)
    dems = np.zeros(num_districts, dtype=np.int32)
    df = cities.groupby(assign).sum()
    reps[df.index] = df['reps']
    dems[df.index] = df['dems']
    total = reps + dems
    delta = np.minimum(np.maximum(total, min_voters_in_district),
                       max_voters_in_district) - total
    rep_win = reps > dems
    dict = {
        'reps': reps,
        'dems': dems,
        'total': total,
        'rep_win': rep_win
    }
    return (pd.DataFrame(data=dict))

In [31]:
max_moves_no_improve = 1000
max_iter = 10000
temp = 1000
alpha = 0.995
best_fitness = -10000000

current_assign = np.random.randint(low=0,high=num_districts,size = num_cities) # random starting points
current_fitness = fitness_districts(current_assign,cities)

iteration = 1
num_moves_no_improve = 0
while (num_moves_no_improve < max_moves_no_improve and iteration < max_iter):

    num_moves_no_improve += 1
    new_assign = current_assign.copy()
    # can sometimes create no change ...
    new_assign[ np.random.randint(18)] = np.random.randint(10)
    new_fitness = fitness_districts(new_assign,cities)
    delta = new_fitness-current_fitness # new-current for maximization
    prob = np.exp(min(delta, 0) / temp)
    accept = new_fitness > current_fitness or np.random.uniform() < prob

#     # probably remove the print statement in your own code
#     print('{:3d} |   {:3d}   |  {:4d} |  {:3d} |   {:>3}  | {:0.4f} | {:2.2f}'.format(
#         new_fitness, current_fitness, delta, best_fitness, "yes" if accept else "no", prob,
#         temp))
    
    if accept:
        current_assign= new_assign
        current_fitness = new_fitness
        if current_fitness > best_fitness:
            best_assign = current_assign
            best_fitness = current_fitness
            num_moves_no_improve = 0
    temp *= alpha
    iteration += 1

summarize_districts(best_assign, cities)

Unnamed: 0,reps,dems,total,rep_win
0,150,146,296,True
1,154,111,265,True
2,166,165,331,True
3,83,72,155,True
4,146,143,289,True
5,62,152,214,False
6,165,161,326,True
7,62,98,160,False
8,162,121,283,True
9,171,150,321,True


This seems to find good solutions with Republicans winning 7 or 8 districts.  However, the optimum is 9 districts, we'll see if the simanneal package does better.

# <font color = "blue"> Self Assessment: Simulated Annealing for Gerrymandering - part 2</font>

In [5]:
# relies on the cities dataframe defined above

from simanneal import Annealer

class GerrymanderingProblem(Annealer):

    # pass extra data (the distance matrix) into the constructor
    def __init__(self, state, cities_df, num_districts):
        self.cities = cities_df
        self.num_dist = num_districts
        super(GerrymanderingProblem, self).__init__(state)  # important!

    def move(self):
        """Move one city to another district.  Doesn't check to make sure city actually changes"""
        assign = self.state
        num_cities = len(assign)
        num_districts = self.num_dist
        assign[ np.random.randint(num_cities)] = np.random.randint(self.num_dist)
        return self.energy()
    
    def energy(self):
        """Compute number of districts - penalty for too small or too large.  Note that 
        we've negated the return value so that we can minimize to find a maximum"""
        cities = self.cities
        assign = self.state
        #print(assign)
        df = cities.groupby(assign).sum()
        fitness = sum( df['reps'] > df['dems'] )
        total_voters = np.zeros(num_districts,dtype=np.int32)
        total_voters[df.index] = df.sum(axis=1)
        fitness-= np.abs(np.minimum(np.maximum(total_voters,150),350)-total_voters).sum()
        return -fitness

initial_assign = np.random.randint(low=0,high=num_districts,size = num_cities)
gerrymander = GerrymanderingProblem(initial_assign, cities, num_districts)
#gerrymander.set_schedule(gerrymander.auto(minutes=.2)) #set approximate time to find results
gerrymander.Tmax = 25000.0  # Max (starting) temperature
gerrymander.Tmin = 2.5      # Min (ending) temperature
gerrymander.steps = 5000   # Number of iterations
gerrymander.updates = 100   # Number of updates (by default an update prints to stdout)
gerrymander.copy_strategy = "method"
best_assign, best_fitness = gerrymander.anneal()

summarize_districts(best_assign, cities)

 Temperature        Energy    Accept   Improve     Elapsed   Remaining
     2.50000         -7.00     8.00%     4.00%     0:00:17     0:00:00

Unnamed: 0,reps,dems,total,rep_win
0,68,93,161,False
1,144,184,328,False
2,180,157,337,True
3,146,143,289,True
4,149,190,339,False
5,125,94,219,True
6,127,117,244,True
7,83,75,158,True
8,152,120,272,True
9,147,146,293,True


Whether using the auto temperate schedule or a manually selected temperature, this usually returns a result of 7 or 8 districts.  Still not optimal.  This search space is huge and a trajectory based method has a tough time exploring the vast space!

# <font color = "blue"> Self Assessment: Continuous Self Annealing </font> 

In [29]:
# the argument to the objective function is a list that contains the values of x and y
def f(xy):
    obj = 0.2 + sum(xy**2 - 0.1*np.cos(6*np.pi*xy))
    return obj

current_x = np.random.uniform(low=-1, high=1, size=2)
current_obj = f(current_x)
best_x = current_x
best_obj = current_obj
temp = 5  # choose initial temperature around the beginning tour distance
alpha = 0.95
iteration = 1
max_iterations = 200
# save some results for plotting
trajectory = [[iteration, current_x[0], current_x[1], current_obj, best_obj]]

while (iteration < max_iterations):  # could have also used a for loop
    new_x = current_x + np.random.normal(loc=0, scale=0.1, size=2)
    new_x = np.minimum([1, 1], np.maximum([-1, -1], new_x))
    new_obj = f(new_x)
    delta = current_obj - new_obj
    prob = min(np.exp(np.min(delta, 0) / temp), 1)
    accept = new_obj < current_obj or np.random.uniform() < prob
    if accept:
        current_x = new_x
        current_obj = new_obj
        if current_obj < best_obj:
            best_x = current_x
            best_obj = current_obj
        trajectory.append(
            [iteration, current_x[0], current_x[1], current_obj, best_obj])
    temp *= alpha
    iteration += 1

trajectory = np.array(trajectory)
fig = plt.figure(figsize=(5, 3.5))
line_best, = plt.plot(trajectory[:,0],trajectory[:,4],label='Best Val')
line_curr, = plt.plot(trajectory[:,0],trajectory[:,3],label='Curr Val',color='red')
plt.xlabel('Iteration')
plt.ylabel('Function Value')
plt.legend(handles=[line_best, line_curr])
plt.title('Smallest Val. Found: {:0.4f}'.format(best_obj));
    
print("The smallest value found was {:0.4f} at x = {:0.4f} and y = {:0.4f}".format(best_obj, best_x[0], best_x[1]))

<IPython.core.display.Javascript object>

The smallest value found was 0.0039 at x = 0.0144 and y = 0.0001


There is no exact answer here.  Starting at a higher temperature, T = 5, and decreasing the temperature more slowly, alpha = .95 both seem to improve convergence.  This allows more bad moves.  Also decrease the scale of the moves in the normal distribution seems to help.

# <font color = "blue"> Self Assessment: Continuous Self Annealing with `simanneal` </font> 

In [30]:
from simanneal import Annealer

class NonConvex2D(Annealer):

    # no extra data so just initialize with state
    def __init__(self, state):
        super(NonConvex2D, self).__init__(state)  # important!

    def move(self):
        """Add a perturbation selected from normal distribution"""
        xy = self.state
        # scale = sd is selected so that 6 standard deviations fit inside bounds
        xy_new = xy + np.random.normal(loc = 0,scale =.1,size=2)
        # clip to fit bounds
        xy_clipped = np.minimum([1,1],np.maximum([-1,-1],xy_new))
        self.state = xy_clipped
        return self.energy()
    
    def energy(self):
        x = self.state
        return 0.2 + sum( x**2 - 0.1*np.cos(6*np.pi*x))

x0 = np.random.uniform(low=-1,high=1,size=2)
problem2D = NonConvex2D( x0 )
problem2D.set_schedule(problem2D.auto(minutes=.5))
problem2D.copy_strategy = "method" # we're using np.array to hold our state variable, so we'll use .copy() to clone
best_x, best_fun = problem2D.anneal()

(best_fun,best_x)

 Temperature        Energy    Accept   Improve     Elapsed   Remaining
     0.00022          0.00     0.05%     0.00%     0:00:02    -1:59:59 Temperature        Energy    Accept   Improve     Elapsed   Remaining
     0.00022          0.00     0.11%     0.05%     0:00:26     0:00:00

(6.878934761533895e-07, array([ 6.94820320e-05, -1.78409751e-04]))

Again, there isn't an exact answer.  Increasing the time for the auto schedule and decreasing the scale of the moves seems to help the search get closer to the exact optimum value.  Moving randomly is good for exploring the space, but not so good for zeroing in on a minimum.

# <font color = "blue"> Self Assessment: Gerrymandering Problem with GA </font>

In [39]:
# set the data and define the objective function
num_districts = 10
min_voters_in_district = 150
max_voters_in_district = 350
num_cities = 18

dems = [152,81,75,34,62,38,48,74,98,66,83,86,72,28,112,45,93,72]
reps = [62,59,83,52,87,87,69,49,62,72,75,82,83,53,98,82,68,98]
cities = pd.DataFrame( data = {'dems':dems, 'reps':reps})

# same as in Lesson 4
def fitness_districts(assign, cities):
    df = cities.groupby(assign).sum()
    fitness = sum( df['reps'] > df['dems'] )
    total_voters = np.zeros(num_districts,dtype=np.int32)
    total_voters[df.index] = df.sum(axis=1)
    fitness-= np.abs(np.minimum(np.maximum(total_voters,150),350)-total_voters).sum()
    return fitness 

def fitness_districts_tuple(assign, cities):
    return (fitness_districts(assign,cities),)

# for printing out an assignment of cities to districts
def summarize_districts(assign, cities):
    reps = np.zeros(num_districts, dtype=np.int32)
    dems = np.zeros(num_districts, dtype=np.int32)
    df = cities.groupby(assign).sum()
    reps[df.index] = df['reps']
    dems[df.index] = df['dems']
    total = reps + dems
    delta = np.minimum(np.maximum(total, min_voters_in_district),
                       max_voters_in_district) - total
    rep_win = reps > dems
    dict = {
        'reps': reps,
        'dems': dems,
        'total': total,
        'rep_win': rep_win
    }
    return (pd.DataFrame(data=dict))

In [41]:
def customGA(pop_size, cx_prob, mut_prob, max_gen, max_no_improve):

    pop = toolbox.population(n=pop_size)
    logbook = tools.Logbook()
    hof = tools.HallOfFame(1)

    # Evaluate the entire population
    fitnesses = list(map(toolbox.evaluate, pop))
    for ind, fit in zip(pop, fitnesses):
        ind.fitness.values = fit

    hof.update(pop)
    best_val = hof[0].fitness.values
    num_no_improve = 0
    generation = 0

    while num_no_improve < max_no_improve and generation < max_gen:

        # Select the next generation individuals
        selected = toolbox.select(pop, len(pop))
        # Clone the selected individuals
        offspring = list(map(toolbox.clone, selected))

        # Apply crossover and mutation on the offspring
        for child1, child2 in zip(offspring[::2], offspring[1::2]):
            if random.random() < cx_prob:
                toolbox.mate(child1, child2)
                del child1.fitness.values
                del child2.fitness.values

        for mutant in offspring:
            if random.random() < mut_prob:
                toolbox.mutate(mutant)
                del mutant.fitness.values

        # Evaluate the individuals with an invalid fitness
        invalid_ind = [ind for ind in offspring if not ind.fitness.valid]
        fitnesses = map(toolbox.evaluate, invalid_ind)
        num_evals = 0
        for ind, fit in zip(invalid_ind, fitnesses):
            num_evals += 1
            ind.fitness.values = fit

        # The population is entirely replaced by the offspring
        pop[:] = offspring

        # track the best value and reset counter if there is a change
        hof.update(pop)
        curr_best_val = hof[0].fitness.values[0]
        num_no_improve += 1
        if curr_best_val != best_val:
            best_val = curr_best_val
            num_no_improve = 0

        # record stats
        record = stats.compile(pop)
        logbook.record(gen=generation, evals=num_evals, **record)

        # increment generation
        generation += 1

    best_x = list(hof[0])

    return best_val, best_x, logbook

In [72]:
# warning - We're using a large population here (2000) to really explore the search space
# this will take a few minutes to run.  A pop size of 200 will usually give a solution 
# with 8 districts.  Note also that there is a fixed random number seed at line 46
# so that running this code again will produce the same answer

individual_size =num_cities

# create a minimization problem
creator.create("FitnessGerry", base.Fitness, weights=(1.0, ))
# tells us what kind of individuals we'll have - arrays of integers
creator.create("Individual",list,fitness=creator.FitnessGerry)

# configure toolbox
toolbox = base.Toolbox()
# define how we create an individual and a population
def create_individual(nd,nc):
    return random.choices(range(nd),k=nc)
toolbox.register("indices",create_individual,num_districts,individual_size)
toolbox.register("individual", tools.initIterate, creator.Individual,
                 toolbox.indices)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
# Fitness
toolbox.register("evaluate", fitness_districts_tuple, cities=cities)
# Selection
toolbox.register("select", tools.selTournament, tournsize=8)
# Crossover
toolbox.register("mate", tools.cxOnePoint)
# Mutation
toolbox.register("mutate", tools.mutUniformInt, low = 0, up=num_districts-1, indpb=0.15)

# configure statistics to collect
stats = tools.Statistics(key=lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)

# define search parameters
pop_size = 2000
crossover_prob = .8
mutation_prob = .25
max_gen = 500
max_no_improve = 100

# get solution - random seed added for reproducibility
random.seed(15)
best_fitness, best_assign, log = customGA(pop_size, crossover_prob, mutation_prob,
                                     max_gen, max_no_improve)

# plot search convergence
fig = plt.figure(figsize=(5, 3.5))
line_best, = plt.plot(log.select('gen'), log.select('max'), label='Best')
line_avg, = plt.plot(log.select('gen'),
                     log.select('avg'),
                     color='red',
                     label='Avg')
plt.xlabel('Generation')
plt.ylabel('Distance')
plt.legend(handles=[line_best, line_avg])
plt.title('Most Dist. Found: {:d}'.format(int(best_dist)))

<IPython.core.display.Javascript object>

Text(0.5, 1.0, 'Most Dist. Found: 9')

In [74]:
summarize_districts(best_assign,cities)

Unnamed: 0,reps,dems,total,rep_win
0,146,143,289,True
1,83,75,158,True
2,98,72,170,True
3,154,152,306,True
4,144,131,275,True
5,111,226,337,False
6,180,157,337,True
7,155,131,286,True
8,167,160,327,True
9,83,72,155,True


# <font color = "blue"> Self Assessment: Experiment with continuous GA </font>

In [88]:
# unfold to see code
def f(x):
    obj = sum(0.1 + x[i]**2 - 0.1 * np.cos(6 * np.pi * x[i])
              for i in range(len(x)))
    return obj


def f_tuple(x):
    return (
        f(x),
    )  # this version is for DEAP since it expects a tuple to be returned


low_bound = -1.0
high_bound = 1.0

individual_size = 20  # number of decision variables = dimension


def NonConvex2D_Explore(pop_size=20,
                        crossover_prob=.8,
                        mutation_prob=0.2,
                        max_gen=2000,
                        max_no_improve=200,
                        tournsize = 3,
                        indpb=0.1,
                        sigma=0.3):
    # indpb is probability of mutation of each variable, sigma is the scale for new moves

    # create a minimization problem
    creator.create("NonConvex2D", base.Fitness, weights=(-1.0, ))
    # tells us what kind of individuals we'll have - arrays of integers
    creator.create("Individual", list, fitness=creator.NonConvex2D)

    # configure toolbox
    toolbox = base.Toolbox()

    # define how we create an individual and a population
    def create_individual(n):
        return np.random.uniform(low_bound, high_bound, size=n).tolist()

    toolbox.register("indices", create_individual, individual_size)
    toolbox.register("individual", tools.initIterate, creator.Individual,
                     toolbox.indices)
    toolbox.register("population", tools.initRepeat, list, toolbox.individual)

    # Fitness
    toolbox.register("evaluate", f_tuple)
    # Selection
    toolbox.register("select", tools.selTournament, tournsize=tournsize)
    # Crossover
    toolbox.register("mate", tools.cxOnePoint)
    # Mutation
    toolbox.register("mutate",
                     tools.mutGaussian,
                     mu=0,
                     sigma=sigma,
                     indpb=indpb)

    # NEW - add decorator (a wrapper) to the mutation operator to clip to bounds
    def checkBounds(min, max):
        def decorator(func):
            def wrapper(*args, **kargs):
                offspring = func(*args, **kargs)
                for child in offspring:
                    for i in range(len(child)):
                        if child[i] > max:
                            child[i] = max
                        elif child[i] < min:
                            child[i] = min
                return offspring

            return wrapper

        return decorator

    toolbox.decorate("mutate", checkBounds(low_bound, high_bound))

    # configure statistics to collect
    stats = tools.Statistics(key=lambda ind: ind.fitness.values)
    stats.register("avg", np.mean)
    stats.register("std", np.std)
    stats.register("min", np.min)
    stats.register("max", np.max)

    # get solution
    best_val, best_x, log = customGA(pop_size, crossover_prob, mutation_prob,
                                     max_gen, max_no_improve)

    # plot search convergence
    fig = plt.figure(figsize=(5, 3.5))
    line_best, = plt.plot(log.select('gen'), log.select('min'), label='Best')
    line_avg, = plt.plot(log.select('gen'),
                         log.select('avg'),
                         color='red',
                         label='Avg')

    plt.xlabel('Generation')
    plt.ylabel('Function')
    plt.legend(handles=[line_best, line_avg])
    plt.title('Min Val Found: {:.2f}'.format(int(best_val)))

    print(
        'The smallest value of f found is {:0.5f} at x = {:0.5f}, y = {:0.5f}'.
        format(best_val, best_x[0], best_x[1]))
    return best_val, best_x, log

In [93]:
# consider these to be the base values of the parameters
val, x, log = NonConvex2D_Explore(pop_size=20,
                                  crossover_prob=0.8,
                                  mutation_prob=0.2,
                                  max_gen=2000,
                                  max_no_improve=200,
                                  tournsize = 3,
                                  indpb=0.1,
                                  sigma=0.3)
# we could have just typed 
# NonConvex2D_Explore(20,0.8,0.2,2000,200,0.1,0.3)
# but did it as above for clarity

<IPython.core.display.Javascript object>

The smallest value of f found is 0.01744 at x = -0.00268, y = 0.00369


* Experiment with `tournsize` in line 23.  Tournament selection works by first choosing `tournsize` individuals from the population and then choosing the best of those to be selected.  Small values for `tournsize` allow more weaker (less optimal) individuals.  Large values make it hard for less optimal values to be selected.  How does `tournsize` affect convergence? What is happening is you make `tournsize` the same as the population size?

In [90]:
# increasing tournsize from 3 to 10 seems to have little effect
val, x, log = NonConvex2D_Explore(pop_size=20,
                                  crossover_prob=0.8,
                                  mutation_prob=0.2,
                                  max_gen=2000,
                                  max_no_improve=200,
                                  tournsize = 10,
                                  indpb=0.1,
                                  sigma=0.3)

<IPython.core.display.Javascript object>

The smallest value of f found is 0.01118 at x = 0.00215, y = -0.00236


In [92]:
# increasing from to 20 means that all of the offspring will be the same, best individual
# the search seems to do OK though
val, x, log = NonConvex2D_Explore(pop_size=20,
                                  crossover_prob=0.8,
                                  mutation_prob=0.2,
                                  max_gen=2000,
                                  max_no_improve=200,
                                  tournsize = 20,
                                  indpb=0.1,
                                  sigma=0.3)

<IPython.core.display.Javascript object>

The smallest value of f found is 0.00303 at x = 0.00331, y = 0.00205


* Experiment with `indpb` at line 37.  This parameter is the probability that an element of an individual will undergo a mutation.  What happens if you increase it?

In [96]:
# increasing indpb to .9 meaning that about 90% of entries in a mutated vector will change
# the search is a bit more random this way and seems to have more trouble getting very close 
# to the optimum
val, x, log = NonConvex2D_Explore(pop_size=20,
                                  crossover_prob=0.8,
                                  mutation_prob=0.2,
                                  max_gen=2000,
                                  max_no_improve=200,
                                  tournsize = 3,
                                  indpb=0.9,
                                  sigma=0.3)

<IPython.core.display.Javascript object>

The smallest value of f found is 0.00616 at x = 0.00116, y = -0.00129


* Experiment with the scale parameter sigma.  Large values mean larger moves.  How does this parameter affect convergence?

In [98]:
# increased sigma to 1 so mutations are much more varied now.  it might have a small negative
# effect on local convergence
val, x, log = NonConvex2D_Explore(pop_size=20,
                                  crossover_prob=0.8,
                                  mutation_prob=0.2,
                                  max_gen=2000,
                                  max_no_improve=200,
                                  tournsize = 3,
                                  indpb=0.1,
                                  sigma=1)

<IPython.core.display.Javascript object>

The smallest value of f found is 0.01377 at x = -0.01339, y = 0.00266


* What kind of search are you getting if you simply set crossover_prob = 0 and mut_prob = 1?  Is this effective?  What happens if you turn off mutation and only allow crossover?

In [103]:
# cx_prob = 1, mut_prob = 0
# this is more or less pure random search now and it doesn't converge to the global optimum
val, x, log = NonConvex2D_Explore(pop_size=20,
                                  crossover_prob=0,
                                  mutation_prob=1,
                                  max_gen=2000,
                                  max_no_improve=200,
                                  tournsize = 3,
                                  indpb=0.1,
                                  sigma=0.3)

<IPython.core.display.Javascript object>

The smallest value of f found is 1.08823 at x = -0.00381, y = -0.00907


In [102]:
# cx_prob = 0, mut_prob = 1
# no mutations means that we're pretty much mixing the original population repeatedly 
# so it may never converge to the global optimumum unless some mix of the original population
# is (0,0,0,...,0)
val, x, log = NonConvex2D_Explore(pop_size=20,
                                  crossover_prob=1,
                                  mutation_prob=0,
                                  max_gen=2000,
                                  max_no_improve=200,
                                  tournsize = 3,
                                  indpb=0.1,
                                  sigma=0.3)

<IPython.core.display.Javascript object>

The smallest value of f found is 4.02953 at x = 0.03524, y = -0.74991
