In [1]:
# execute to import notebook styling for tables and width etc.
from IPython.core.display import HTML
import urllib.request
response = urllib.request.urlopen('https://raw.githubusercontent.com/DataScienceUWL/DS775v2/master/ds755.css')
HTML(response.read().decode("utf-8"));

import warnings
warnings.filterwarnings('ignore')

In [15]:
# imports
%matplotlib notebook
from scipy.optimize import minimize
import babel.numbers as numbers
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
import pandas as pd
import array
import random
import json
from deap import algorithms, base, creator, tools
import numpy as np
from mpl_toolkits.basemap import Basemap

import warnings
warnings.filterwarnings('ignore')

# Simulated Annealing for TSP with 48 cities

Adapt our "homemade" simulated annealing code from Section 2.1 in the lesson to solve the 48 capitals TSP problem from last week.  The json file `Caps48.json` in the data folder has both the distance matrix and the coordinates of the cities for plotting.  Note the distance of typical random tour is something like 80,000,000 meters and we're looking for something under 19,000,000 meters.  Your initial temperature has to be large enough to allow large moves on this scale and you'll have to increase `max_moves_no_improve` as well.  You should plot the progress of the search, like in the lesson, and you should plot the final result on the map of the United States as we did last week (that code is given in a cell below). Included a value for the random seed so that your results are reproducible.  Report a search that gives total distance < 18,000,000 meters.  A tour that achieves the optimum distance is shown in the json file.  

Put your code to find the solution in the cell below.  Your output should include a convergence plot like in the lesson.

<font color = "blue"> *** 4 points -  answer in cell below *** (don't delete this cell) </font>

In [29]:
# load data (this may have to be adapted for different problems)
with open("data/Caps48.json", "r") as tsp_data:
    tsp = json.load(tsp_data)
distance_matrix = tsp["DistanceMatrix"]
individual_size = tsp["TourSize"]


# define objective function
def tour_distance(individual, dist_mat):
    distance = dist_mat[individual[-1]][individual[0]]
    for gene1, gene2 in zip(individual[0:-1], individual[1:]):
        distance += dist_mat[gene1][gene2]
    return distance


def sub_tour_reversal(tour):
    # reverse a random tour segment
    i, j = np.sort(np.random.choice(individual_size, 2, replace=False))
    swapped = np.concatenate((tour[0:i], tour[j:-individual_size + i - 1:-1],
                              tour[j + 1:individual_size]))
    return [int(swapped[i]) for i in range(individual_size)]



def random_reversals(max_moves_no_improve):
    # Random Number Seed
    # if you want reproducible results, then uncomment the following line
    # and play with the seed value until you get a result you like. If you run 
    # it again with the same value, then you'll get the same result.
    # np.random.seed(123)

    # initialize with a random tour
    current_tour = np.random.permutation(np.arange(individual_size)).tolist()
    current_dist = tour_distance(current_tour, distance_matrix)
    best_tour = current_tour
    best_dist = current_dist
    temp = 80  # choose initial temperature around the beginning tour distance
    alpha = 0.99
    iteration = 1
    print('Starting Distance {:d} and Starting Temp {:d}\n'.format(
        current_dist, temp))
    print('New | Current | Delta | Best | Accept |  Prob  | Temp')

    # these two variables just save info for plotting but aren't 
    # essential for the algorithm
    trajectory = [[iteration,current_dist]]
    trajectory_best = [[iteration,best_dist]]

    num_moves_no_improve = 0
    iteration = 0
    
    while (num_moves_no_improve < max_moves_no_improve):

        num_moves_no_improve += 1
        new_tour = sub_tour_reversal(current_tour)
        new_dist = tour_distance(new_tour, distance_matrix)
        delta = current_dist - new_dist
        prob = np.exp(min(delta, 0) / temp)
        accept = new_dist < current_dist or np.random.uniform() < prob

        # probably remove the print statement in your own code
        print('{:3d} |   {:3d}   |  {:4d} |  {:3d} |   {:>3}  | {:0.4f} | {:2.2f}'.format(
            new_dist, current_dist, delta, best_dist, "yes" if accept else "no", prob,
            temp))

        if accept:
            current_tour = new_tour
            current_dist = new_dist
            if current_dist < best_dist:
                best_tour = current_tour
                best_dist = current_dist
                num_moves_no_improve = 0
        temp *= alpha
        iteration += 1
        trajectory.append([iteration,current_dist])
        trajectory_best.append([iteration,best_dist])
        
    return best_tour, best_dist, iteration

best_tour, best_dist, iterations = random_reversals(1000)

Starting Distance 72451892 and Starting Temp 80

New | Current | Delta | Best | Accept |  Prob  | Temp
73615339 |   72451892   |  -1163447 |  72451892 |    no  | 0.0000 | 80.00
72562499 |   72451892   |  -110607 |  72451892 |    no  | 0.0000 | 79.20
72991167 |   72451892   |  -539275 |  72451892 |    no  | 0.0000 | 78.41
72432644 |   72451892   |  19248 |  72451892 |   yes  | 1.0000 | 77.62
71200344 |   72432644   |  1232300 |  72432644 |   yes  | 1.0000 | 76.85
70178754 |   71200344   |  1021590 |  71200344 |   yes  | 1.0000 | 76.08
70543503 |   70178754   |  -364749 |  70178754 |    no  | 0.0000 | 75.32
70135405 |   70178754   |  43349 |  70178754 |   yes  | 1.0000 | 74.57
69250586 |   70135405   |  884819 |  70135405 |   yes  | 1.0000 | 73.82
71353951 |   69250586   |  -2103365 |  69250586 |    no  | 0.0000 | 73.08
68916142 |   69250586   |  334444 |  69250586 |   yes  | 1.0000 | 72.35
70168161 |   68916142   |  -1252019 |  68916142 |    no  | 0.0000 | 71.63
69368429 |   68916142   

31560295 |   24264465   |  -7295830 |  24264465 |    no  | 0.0000 | 0.00
28362955 |   24264465   |  -4098490 |  24264465 |    no  | 0.0000 | 0.00
25055111 |   24264465   |  -790646 |  24264465 |    no  | 0.0000 | 0.00
30576223 |   24264465   |  -6311758 |  24264465 |    no  | 0.0000 | 0.00
28452682 |   24264465   |  -4188217 |  24264465 |    no  | 0.0000 | 0.00
25919702 |   24264465   |  -1655237 |  24264465 |    no  | 0.0000 | 0.00
25679831 |   24264465   |  -1415366 |  24264465 |    no  | 0.0000 | 0.00
26490014 |   24264465   |  -2225549 |  24264465 |    no  | 0.0000 | 0.00
25753749 |   24264465   |  -1489284 |  24264465 |    no  | 0.0000 | 0.00
24422909 |   24264465   |  -158444 |  24264465 |    no  | 0.0000 | 0.00
27368217 |   24264465   |  -3103752 |  24264465 |    no  | 0.0000 | 0.00
27429645 |   24264465   |  -3165180 |  24264465 |    no  | 0.0000 | 0.00
25875781 |   24264465   |  -1611316 |  24264465 |    no  | 0.0000 | 0.00
24836617 |   24264465   |  -572152 |  24264465 |    n

22125188 |   19768935   |  -2356253 |  19768935 |    no  | 0.0000 | 0.00
19977377 |   19768935   |  -208442 |  19768935 |    no  | 0.0000 | 0.00
21403157 |   19768935   |  -1634222 |  19768935 |    no  | 0.0000 | 0.00
25457521 |   19768935   |  -5688586 |  19768935 |    no  | 0.0000 | 0.00
22140200 |   19768935   |  -2371265 |  19768935 |    no  | 0.0000 | 0.00
20704311 |   19768935   |  -935376 |  19768935 |    no  | 0.0000 | 0.00
22721616 |   19768935   |  -2952681 |  19768935 |    no  | 0.0000 | 0.00
20174681 |   19768935   |  -405746 |  19768935 |    no  | 0.0000 | 0.00
21753299 |   19768935   |  -1984364 |  19768935 |    no  | 0.0000 | 0.00
24732956 |   19768935   |  -4964021 |  19768935 |    no  | 0.0000 | 0.00
21045489 |   19768935   |  -1276554 |  19768935 |    no  | 0.0000 | 0.00
19640183 |   19768935   |  128752 |  19768935 |   yes  | 1.0000 | 0.00
19876539 |   19640183   |  -236356 |  19640183 |    no  | 0.0000 | 0.00
22705009 |   19640183   |  -3064826 |  19640183 |    no  

20326156 |   19218776   |  -1107380 |  19218776 |    no  | 0.0000 | 0.00
20114994 |   19218776   |  -896218 |  19218776 |    no  | 0.0000 | 0.00
20346512 |   19218776   |  -1127736 |  19218776 |    no  | 0.0000 | 0.00
23120318 |   19218776   |  -3901542 |  19218776 |    no  | 0.0000 | 0.00
21166851 |   19218776   |  -1948075 |  19218776 |    no  | 0.0000 | 0.00
23282116 |   19218776   |  -4063340 |  19218776 |    no  | 0.0000 | 0.00
23360807 |   19218776   |  -4142031 |  19218776 |    no  | 0.0000 | 0.00
20359331 |   19218776   |  -1140555 |  19218776 |    no  | 0.0000 | 0.00
22951576 |   19218776   |  -3732800 |  19218776 |    no  | 0.0000 | 0.00
24531225 |   19218776   |  -5312449 |  19218776 |    no  | 0.0000 | 0.00
19827092 |   19218776   |  -608316 |  19218776 |    no  | 0.0000 | 0.00
20153212 |   19218776   |  -934436 |  19218776 |    no  | 0.0000 | 0.00
22600398 |   19218776   |  -3381622 |  19218776 |    no  | 0.0000 | 0.00
19290547 |   19218776   |  -71771 |  19218776 |    no 

24818519 |   19048979   |  -5769540 |  19048979 |    no  | 0.0000 | 0.00
20681962 |   19048979   |  -1632983 |  19048979 |    no  | 0.0000 | 0.00
20949181 |   19048979   |  -1900202 |  19048979 |    no  | 0.0000 | 0.00
24896728 |   19048979   |  -5847749 |  19048979 |    no  | 0.0000 | 0.00
22945346 |   19048979   |  -3896367 |  19048979 |    no  | 0.0000 | 0.00
26452306 |   19048979   |  -7403327 |  19048979 |    no  | 0.0000 | 0.00
21640140 |   19048979   |  -2591161 |  19048979 |    no  | 0.0000 | 0.00
20080295 |   19048979   |  -1031316 |  19048979 |    no  | 0.0000 | 0.00
19327207 |   19048979   |  -278228 |  19048979 |    no  | 0.0000 | 0.00
19336401 |   19048979   |  -287422 |  19048979 |    no  | 0.0000 | 0.00
20833775 |   19048979   |  -1784796 |  19048979 |    no  | 0.0000 | 0.00
25360414 |   19048979   |  -6311435 |  19048979 |    no  | 0.0000 | 0.00
21031778 |   19048979   |  -1982799 |  19048979 |    no  | 0.0000 | 0.00
24818519 |   19048979   |  -5769540 |  19048979 |    

22077353 |   17894750   |  -4182603 |  17894750 |    no  | 0.0000 | 0.00
18454476 |   17894750   |  -559726 |  17894750 |    no  | 0.0000 | 0.00
22199745 |   17894750   |  -4304995 |  17894750 |    no  | 0.0000 | 0.00
19075115 |   17894750   |  -1180365 |  17894750 |    no  | 0.0000 | 0.00
18109075 |   17894750   |  -214325 |  17894750 |    no  | 0.0000 | 0.00
22345826 |   17894750   |  -4451076 |  17894750 |    no  | 0.0000 | 0.00
22246841 |   17894750   |  -4352091 |  17894750 |    no  | 0.0000 | 0.00
19733098 |   17894750   |  -1838348 |  17894750 |    no  | 0.0000 | 0.00
19792947 |   17894750   |  -1898197 |  17894750 |    no  | 0.0000 | 0.00
24479720 |   17894750   |  -6584970 |  17894750 |    no  | 0.0000 | 0.00
19952375 |   17894750   |  -2057625 |  17894750 |    no  | 0.0000 | 0.00
22750933 |   17894750   |  -4856183 |  17894750 |    no  | 0.0000 | 0.00
21441508 |   17894750   |  -3546758 |  17894750 |    no  | 0.0000 | 0.00
21879955 |   17894750   |  -3985205 |  17894750 |    

Plot the tour on the map of the U.S.  Code to do this is below.

<font color = "blue"> *** 1 points -  answer in cell below *** (don't delete this cell) </font>

In [30]:
# load the data and define move and objective functions
map = Basemap(llcrnrlon=-119,
              llcrnrlat=22,
              urcrnrlon=-64,
              urcrnrlat=49,
              projection='lcc',
              lat_1=32,
              lat_2=45,
              lon_0=-95)

# read 48 capitals lat and lon
with open('./data/Caps48.json', 'r') as json_file:
    capitals = json.load(json_file)

xy = np.array(capitals['Coordinates'])

def plot_tour(best_tour, xy, best_dist):
    fig = plt.figure()
    fig.set_size_inches(6, 4)

    # load the shape file with "states"
    map.readshapefile('./data/st99_d00', name='states', drawbounds=True)

    loop_tour = np.append(best_tour, best_tour[0])
    map.plot(xy[:, 0], xy[:, 1], c='r', marker='o', markersize=4, linestyle='')
    lines, = map.plot(xy[loop_tour, 0],
                      xy[loop_tour, 1],
                      c='b',
                      linewidth=1,
                      linestyle='-')
    plt.title('Best Distance {:d} km'.format(int(best_dist)))
        
plot_tour(best_tour, xy, best_dist)
print('The minimum distance found is {:d} after {:d} iterations'.format(
    int(best_dist), iterations))

<IPython.core.display.Javascript object>

The minimum distance found is 17894750 after 5186 iterations


Based on the plot of the tour do you think you have found a nearly optimal tour.  Explain why or why not.

<font color = "blue"> *** 1 points -  answer in cell below *** (don't delete this cell) </font>

Based on the plot, it does look like I'm pretty close to the optimal distance because the contour of the route seems efficient and minimizes the area of the shape.

# Knapsack with simanneal package

The knapsack problem is a classical combinatorial optimization problem that will be good for practicing with the ideas of discrete local search and multistart.  Given a set of items, each with a weight and a value, determine which items to include in a collection so that the total weight is less than or equal to a given limit and the total value is as large as possible.  In the 0-1 version of the knapsack problem, the decision variables are binary (or boolean) and represent whether or not to include each item in the collection.  We'll start with 20 items and you need to determine the collection of items that maximizes the value and keeps the total weight up to 50 (that is $\leq 50$).

In [9]:
# generate random weights and values for a knapsack problem
import numpy as np
num_items = 20
np.random.seed(seed=123)
values = np.random.randint(low=5, high=50, size=num_items)
weights = np.random.randint(low=1, high=10, size=num_items)
max_weight = 50
np.random.seed() # use system clock to reset the seed so future random numbers will appear random

Use the `simanneal` package to apply simulated annealing to finding a good solution to this knapsack problem. Show your solution below.  We suggest using lists of booleans to represent the items included in the knapsack as we did last week.

<font color = "blue"> *** 9 points -  answer in cell below *** (don't delete this cell) </font>

Do you think you've found the knapsack with highest possible value (the global max)?  Why or why not?

<font color = "blue"> *** 1 points -  answer in cell below *** (don't delete this cell) </font>

# 48 Capital TSP with Genetic Algorithm

Use the DEAP genetic algorithm described in the lesson to approximate a solution to the 48 state capital TSP introduced last week.  The distance matrix (in meters) and city coordinates are in `data/Caps48.json`.  Experiment with the algorithm parameters until you can find a tour of length $\leq$ 19,000,000 meters (19,000 kilometers).  Uncomment the random.seed() line and possibly try different seed values so that, if all the other parameters are the same, running the algorithm again will produce the same results.

Put your code in the cell below. Make sure it prints out both the best tour and the tour distance.  Feel free to divide distances by 1000 to display results in kilometers.

<font color = "blue"> *** 4 points -  answer in cell below *** (don't delete this cell) </font>

 Make a plot of the best tour.

<font color = "blue"> *** 1 points -  answer in cell below *** (don't delete this cell) </font>

Based on the plot of the tour do you think you have found a nearly optimal tour.  Explain why or why not.

<font color = "blue"> *** 1 points -  answer in cell below *** (don't delete this cell) </font>

# 48 Capital TSP with Genetic Algorithm and Local Search

Genetic algorithms are great for exploring a large solution space, but not so good at refining the details when close to an optimal solution.  For this reason genetic algorithms are often combined with local search.  The idea is that at each generation some or all of the individuals in the population are replaced by the result of a local search.  We'll explore this by using the 2-opt local search for TSP to refine the three worst tours in each generation.  Create a new customGA() algorithm called customGA_TSP_LS() and include this code at the beginning of the while loop:

```
# replace 3 worst individuals with local searches
pop.sort(key=lambda x:x.fitness.values,reverse=True)
num_loc_search = 3
for i in range(num_loc_search):
    best_tour, best_dist, iterations = two_opt(list(pop[i]),distance_matrix)
    pop[i] = creator.Individual(best_tour)
    pop[i].fitness.values = (best_dist,)
```
This finds the three worst tours and does a 2-opt local search on each and then replaces the results in the population.

You'll also need this version of the 2-opt search that uses the distance matrix:

In [None]:
def sub_tour_reversal_ij(tour,i,j):
    n = len(tour)
    return (np.concatenate((tour[0:i], tour[j:-n + i - 1:-1], tour[j + 1:n])).astype(int))

def tour_distance(individual, dist_mat):
    distance = dist_mat[individual[-1]][individual[0]]
    for gene1, gene2 in zip(individual[0:-1], individual[1:]):
        distance += dist_mat[gene1][gene2]
    return (distance,) 

def two_opt(start_tour,dist_mat):
    num_cities = len(start_tour)
    current_dist = tour_distance(start_tour, dist_mat)[0]
    best_tour = start_tour
    best_dist = current_dist

    improvement = True
    iterations = 0
    while improvement:
        improvement = False
        for i in range(num_cities - 1):
            for j in range(i + 1, num_cities):
                iterations += 1
                new_tour = sub_tour_reversal_ij(best_tour, i, j)
                new_dist = tour_distance(new_tour, dist_mat)[0]
                if new_dist < best_dist:
                    best_tour = new_tour
                    best_dist = new_dist
                    improvement = True
    return best_tour, best_dist, iterations

Put your customGA_TSP_LS function in the next cell. You can run this version of the algorithm with a much smaller population and for far fewer iterations.  Find a tour of length $\leq$ 17,500 km. Use a random number seed for reproducibility.  Put your code in the next cell and print out the best distance and tour.


<font color = "blue"> *** 6 points -  answer in cell below *** (don't delete this cell) </font>

Plot the best tour below. 

<font color = "blue"> *** 1 points -  answer in cell below *** (don't delete this cell) </font>

Is this a good tour?  Explain.

<font color = "blue"> *** 1 points -  answer in cell below *** (don't delete this cell) </font>

# Knapsack problem with GA

Use the DEAP framework to build a genetic algorithm to solve the knapsack problem (same as in problem 2).  

The individuals should be lists of booleans:
```
def create_individual(n):
    return random.choices([True, False], k=n)
```

Use tournament selection and one point crossover.  For mutation flip booleans at random with this:
```
toolbox.register("mutate", tools.mutFlipBit, indpb=.1)
```

Here is a non-tuple version of a fitness function to get you started.  Notice that it includes a penalty term that penalizes if the knapsack is over the maximum weight.

```
def knapsack_value(x, values, weights, max_tot_weight):
    # x is a vector of booleans of which items to include
    tot_value = sum(values[x])
    penalty = sum(values)*min( max_tot_weight - sum(weights[x]), 0) 
    return tot_value+penalty
```
    
Put your code in the cell below.  Use random.seed() to make reproducible results.

<font color = "blue"> *** 9 points -  answer in cell below *** (don't delete this cell) </font>

Do you think you've found the knapsack with highest possible value (the global max)?  Why or why not?

<font color = "blue"> *** 1 points -  answer in cell below *** (don't delete this cell) </font>

# 30 dimensional Rastrigin Function

30 dimensions means that each individual or potential solution is a list of 30 real numbers each between -5.12 and 5.12.  Use either `simanneal` or `DEAP` (or both) to find the global optimum value (it's zero and happens when $x_1 = x_2 = \ldots = x_{30}$). Use random number seeds to make your search reproducible - `random.seed()` and/or `numpy.random.seed`.

<font color = "blue"> *** 10 points -  answer in cell below *** (don't delete this cell) </font>

In [None]:
def rastrigin(x):
    # pass a single vector of length n (=dim) to evaluate Rastrigin
    return sum(x**2 + 10 - 10 * np.cos(2 * np.pi * x))