In [None]:
import numpy as np
import pandas as pd
import folium
import math
import random
from math import radians
import requests
from tqdm.notebook import tqdm
import time

np.random.seed(10)

<h1>Simulated Annealing optimization</h1>

<h2>Reading dataset</h2>
For the purpose of the experiment, the results are only shown for a particular cluster.

In [None]:
c1 = pd.read_csv('clusters_new_features.csv')

In [None]:
c_250 = c1[c1['CLUSTER_hdbscan']==86]

<h2>Plotting cluster on a map</h2>

In [None]:
#plot the cluster in a map to visualize the location
mapf = folium.Map(
    location= [c_250.geolocation_lat.mean(),c_250.geolocation_lng.mean()],
    zoom_start = 10.5,
    tiles= 'OpenStreetMap',
    height= 550
)
circles= c_250.apply(
    lambda row: folium.CircleMarker(
        location= [row.geolocation_lat, row.geolocation_lng],
        radius= 1,
        popup= ""+str(row.geolocation_lat)+", "+str(row.geolocation_lng)+"\n"+str(row.CLUSTER_hdbscan),
        color= 'darkcyan',
        fill= True,
        fill_color= 'darkcyan',
        fill_opacity=0.5
    ).add_to(mapf),
    axis= 1
)
rect= folium.Rectangle(
    bounds= [(c_250.geolocation_lat.min(),c_250.geolocation_lng.min()),(c_250.geolocation_lat.max(), c_250.geolocation_lng.max())]
).add_to(mapf)
cluster= folium.CircleMarker(
    location= [c_250.centroid_lat.values[0],c_250.centroid_lng.values[0]],
    radius= 5,
    color= 'tomato',
    fill= True,
    fill_color= 'tomato',
    fill_opacity=0.5
).add_to(mapf)

mapf

<h2>Particle Swarm optimizer algorithm</h2>
Functions to apply the optimizer to custom dataset.

In [None]:
def cost_function(position, haversine_distances, road_distances, road_durations, lambda_reg=0.01, epsilon=1e-6):
    latitude, longitude, alpha, beta, gamma = position

    cost = (1 / len(position)) * sum(
        alpha * haversine_distances +
        beta * road_distances +
        gamma * road_durations
    ) + lambda_reg * (alpha**2 + beta**2 + gamma**2)

    log_penalty = -np.sum(np.log(np.array([alpha, beta, gamma]) + epsilon))
    cost = cost + log_penalty

    return cost

maps_api_key = 'ENTER YOUR API KEY'
endpoint = 'https://maps.googleapis.com/maps/api/directions/json?'


def get_distance_duration(origin, destination, mode = 'driving'):
    origin_str = f'{origin[0]},{origin[1]}'
    destination_str = f'{destination[0]},{destination[1]}'
    request_url = f'{endpoint}origin={origin_str}&destination={destination_str}&mode={mode}&key={maps_api_key}'
    
    response = requests.get(request_url)
    data = response.json()
    if data['status'] == 'OK':
        road_distance_meters = data['routes'][0]['legs'][0]['distance']['value']
        road_duration_seconds = data['routes'][0]['legs'][0]['duration']['value']

        return road_distance_meters/1000, road_duration_seconds/3600
    else:
        print('Error: Unable to calculate road distance and duration.')
        return None


def haversine(latlon1, latlon2):
    lat1, lon1 = latlon1
    lat2, lon2 = latlon2
    R = 6371000  # radius of Earth in meters
    phi_1 = radians(lat1)
    phi_2 = radians(lat2)

    delta_phi = radians(lat2 - lat1)
    delta_lambda = radians(lon2 - lon1)

    a = (np.sin(delta_phi / 2) ** 2 +
         np.cos(phi_1) * np.cos(phi_2) * np.sin(delta_lambda / 2) ** 2)

    c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))

    meters = R * c
    return meters/1000 # output distance in km


def simulated_annealing(initial_solution, objective_function, haversine_distances, road_distances, road_durations, initial_temp, cooling_rate, iterations):
    current_solution = initial_solution
    current_cost = objective_function(current_solution, haversine_distances, road_distances, road_durations)
    best_solution = current_solution
    best_cost = current_cost
    temperature = initial_temp

    step_cost = []

    for i in tqdm(range(iterations)):
        new_solution = [x + random.uniform(-0.1, 0.1) for x in current_solution]  # Generate a neighboring solution (perturbation)
        new_have, new_road_dist, new_road_dur = update_features(new_solution, customer_locations)
        new_cost = objective_function(new_solution, new_have, new_road_dist, new_road_dur)

        delta_cost = new_cost - current_cost
        
        if delta_cost < 0 or random.random() < math.exp(delta_cost*-1 / temperature):
            current_solution = new_solution
            current_cost = new_cost

        if current_cost < best_cost:
            best_solution = current_solution
            best_cost = current_cost
            step_cost.append({'iteration': i, 'current_soln': best_solution, 'current_cost': best_cost})
            
        temperature *= cooling_rate

    return best_solution, best_cost, step_cost

def update_features(particle, customer_locations):
    dist = []
    dur = []
    hav = []
    origin = (particle[0],particle[1])

    for loc in customer_locations:
        hav.append(haversine(origin, tuple(loc)))
        data = get_distance_duration(origin, tuple(loc))
        if data:
            dist.append(data[0])
            dur.append(data[1])

    return pd.Series(hav), pd.Series(dist), pd.Series(dur)

In [None]:
haversine_distances = np.array(c_250['geo_distance'])
road_distances = np.array(c_250['road_distance'])
road_durations = np.array(c_250['road_duration'])
latitude = c_250['centroid_lat'].values[0]
longitude = c_250['centroid_lng'].values[0]

customer_locations = np.c_[np.array(c_250.geolocation_lat), np.array(c_250.geolocation_lng)]

cost = cost_function(initial_solution, haversine_distances, road_distances, road_durations)
cost

<h2>Cost and performance vs multiple iterations</h2>

In [None]:
initial_temperature = 250  # Initial temperature
cooling_rate = 0.99  # Cooling rate
sim_ann_results = []
for i in range(2,25,2):
  alpha = np.random.uniform(0,0.5)
  beta = np.random.uniform(0,0.5)
  gamma = np.random.uniform(0,0.5)
  initial_solution = np.array([latitude, longitude, alpha, beta, gamma])
  num_iterations = i  # Number of iterations

# Run Simulated Annealing
  st = time.time()
  best_solution, best_cost, step_cost = simulated_annealing(initial_solution, cost_function, haversine_distances, road_distances, road_durations, initial_temperature, cooling_rate, num_iterations)
  et = time.time()

  # get the execution time
  elapsed_time = et - st

  sim_ann_results.append({'iteration': i, 'execution_time': elapsed_time, 'step_cost': step_cost, 'best_soln': best_solution, 'best_cost': best_cost})

<h2>Plotting new warehouse location on map</h2>

In [None]:
#plot the latest value of warehouse's location and comparing it with the initial location on a map
mapf = folium.Map(
    location= [c_250.geolocation_lat.mean(),c_250.geolocation_lng.mean()],
    zoom_start = 11.5,
    tiles= 'OpenStreetMap',
    height= 650
)

circles= c_250.apply(
    lambda row: folium.CircleMarker(
        location= [row.geolocation_lat, row.geolocation_lng],
        radius= 1,
        popup= ""+str(row.geolocation_lat)+", "+str(row.geolocation_lng)+"\n"+str(row.CLUSTER_hdbscan),
        color= 'darkcyan',
        fill= True,
        fill_color= 'darkcyan',
        fill_opacity=0.5
    ).add_to(mapf),
    axis= 1
)

rect= folium.Rectangle(
    bounds= [(c_250.geolocation_lat.min(),c_250.geolocation_lng.min()),(c_250.geolocation_lat.max(), c_250.geolocation_lng.max())]
).add_to(mapf)

cluster1= folium.CircleMarker(
    location= [c_250.centroid_lat.values[0],c_250.centroid_lng.values[0]],
    radius= 5,
    color= 'tomato',
    fill= True,
    fill_color= 'tomato',
    fill_opacity=0.5
).add_to(mapf)

cluster2= folium.CircleMarker(
    location= [best_solution[0], best_solution[1]],
    radius= 5,
    color= '#FA6AE2',
    fill= True,
    fill_color= '#FA6AE2',
    fill_opacity=0.5
).add_to(mapf)

mapf

<h2>Bootstrapping algorithm</h2>

In [None]:
def bootstrap_results(num_boot=2):
    for i in tqdm(range(num_boot)):
        sub_sample = c_250.sample(n= len(c_250), replace= True, random_state= 12)
        customer_locations= np.c_[np.array(sub_sample.geolocation_lat), np.array(sub_sample.geolocation_lng)]
        base_solution = np.array([sub_sample.centroid_lat.values[0], sub_sample.centroid_lng.values[0], np.random.uniform(0,0.5),np.random.uniform(0,0.5),np.random.uniform(0,0.5)])
        haversine_distances = np.array(sub_sample['geo_distance'])
        road_distances = np.array(sub_sample['road_distance'])
        road_durations = np.array(sub_sample['road_duration'])
        start = time.time()
        best_solution, best_cost, step_cost = simulated_annealing(base_solution, cost_function, haversine_distances, road_distances, road_durations, initial_temperature, cooling_rate, 4)
        stop = time.time()

        position.append(best_solution)
        score.append(best_cost)
        perf.append(stop-start)

    return score, position, perf

score = []
position = []
perf= []
score, postion, perf = bootstrap_results(100)