In [1]:
import ast
import json
import scipy.stats as stat
%run data_plot.ipynb
%run metrics.ipynb
%run discrete_genetic_algorithm.ipynb
%run ant_colony_optimization.ipynb
%run dijkstras_algorithm.ipynb
%run simulated_annealing.ipynb

In [2]:
def parse_map_data(results):
    """
    parse_map_data(results)
    converts routes data to geographic routes that can be represented on a map.
    this is done by checking the sequence of x y coordinates against their corresponding
    geographic coordinates defined in the plotting module.
    results: the algorithm search results that contains the best solution to the destination
    and all other possible routes.
    returns: the coordinates used to create the map representation
    """
    # init dictionary with the structure expected for map visualization
    coordinates = {'best_route': {'sensor_id': [],
                              'coordinates': []}, 
               'possible_routes': []
              }
    # convert the string containing the list of the best path to type list
    possible_paths = [ast.literal_eval(paths) for paths in results['possible_paths'].keys()]
    # get the geographic coordinates for all nodes in possible paths
    for path in possible_paths:
        route_coords = []
        possible_path_dest_idx = len(path) - 1
        for idx, sensor in enumerate(path):
            if idx != 0 and idx != possible_path_dest_idx:
                # duplicate coordinates for all nodes but for the first and last
                route_coords.append(geographic_coords[str(sensor)])
                route_coords.append(geographic_coords[str(sensor)])
            else:
                route_coords.append(geographic_coords[str(sensor)])
        coordinates['possible_routes'].append(route_coords)
    # add best path last so that an visual it appears on top
    best_path = ast.literal_eval(list(results['best_path'].keys())[0])
    best_path_dest_idx = len(best_path) - 1
    for idx, sensor in enumerate(best_path):
        # include sensor IDs for the best path except for the destination (won't need pin marker)
        if idx != best_path_dest_idx:
            coordinates['best_route']['sensor_id'].append(sensor)
        if idx != 0 and idx != best_path_dest_idx:
            # duplicate coordinates for all nodes but for the first and last
            coordinates['best_route']['coordinates'].append(geographic_coords[str(sensor)])
            coordinates['best_route']['coordinates'].append(geographic_coords[str(sensor)])
        else:
            coordinates['best_route']['coordinates'].append(geographic_coords[str(sensor)])

    return coordinates

In [3]:
def load_solutions():
    """
    load_solutions()
    it loads solutions that already exist since new solutions are appended to the json
    object before being added written on the file.
    returns: the json object read from the file
    """
    # fetch existing best solutions from the solutions file
    with open('solutions.json', 'r') as file:
        try:
            solutions = json.load(file)
        except Exception as e:
            # if the solutions file is empty, initialize and empty dict to be used
            solutions = {}

    return solutions

In [4]:
def save_solution(solution, method):
    """
    save_solution(solution, method)
    saves the best solution found by each algorithm in a solutions json file.
    the solutions file has a format like...
    {"source node": "Node-5", 
    "routing path": "(Node-2, 1 Mbps), (Node-1, 4Mbps), (BS-2, 2 Mbps)",
    "end-to-end transmission rate": "1 Mbps"}
    solution: the best solution to a base station from a sensor
    method: the method that was used to get that best solution.
    returns: nothing
    """
    # fetch existing solutions
    existing_solutions = load_solutions()
    '''
    if solution for a sensor using the method specified exists, 
    update the dict with a new solution, otherwise create the solution
    '''
    if method in existing_solutions.keys():
        existing_solutions[method][solution[0]['source node']] = solution[0]
    else:
        existing_solutions[method] = {}
        existing_solutions[method][solution[0]['source node']] = solution[0]
    # write the updated solutions to the solutions file
    json.dump(existing_solutions, open( f"solutions.json", 'w' ) )

    return

In [5]:
def find_optimal_route(origin, method, **kwargs):
    """
    find_optimal_route(origin, method, **kwargs)
    the main function of this assessment. It searches for best route 
    from a node to either of the 2 base stations. The search is done using
    the algorithm specified as one of the arguments. Supported algorithms 
    include: 'discrete genetic algorithm', 'ant colony optimization', 
    'dijkstras algorithm' and 'simulated annealing'. Each of the algorithms,
    return a best solutions dictionary that is stored in the solutions file,
    and the results dictionary which is used for visualization on a map
    origin: the node whose best path to a base station you are seeking
    method: the algorithm you want to use to perform the search
    **kwargs: any additional arguments you want to pass in for
    optimization of the various algorithms
    Expected **kwargs and their fallback values:
    discrete genetic algorithm: pop_length=100000, generations=10, mutation_rate=0.1, crossover_type='single-point', selection_limit=20
    ant colony optimization: Q=1, alpha = 0.6, evaporate=0.45, ants_count=20, max_iterations=10
    dijkstras algorithm: None
    simulated annealing: init_temp=100, final_temp=0.1, cooling_rate=0.95, improvement_checker_count=50
    More details on the **kwargs provided in the algorithms' respective modules
    returns: nothing
    """
    # run the algorithm passed in as a parameter and fetch the visualization results together with the solutions file data
    if method == "discrete genetic algorithm":
        results, solutions = discrete_genetic_algorithm(
            origin,
            pop_length=kwargs.get("pop_length", 100000),
            generations=kwargs.get("generations", 10),
            mutation_rate=kwargs.get("mutation_rate", 0.1),
            crossover_type=kwargs.get("crossover_type", "single-point"),
            selection_limit=kwargs.get("selection_limit", 20),
        )
    elif method == "dijkstras algorithm":
        results, solutions = ant_colony_optimization(
            origin,
            Q=kwargs.get("Q", 1),
            alpha=kwargs.get("alpha", 0.6),
            evaporate=kwargs.get("evaporate", 0.45),
            ants_count=kwargs.get("ants_count", 20),
            max_iterations=kwargs.get("max_iterations", 10),
        )
    elif method == "ant colony optimization":
        results, solutions = ant_colony_optimization(origin)
    elif method == "simulated annealing":
        results, solutions = simulated_annealing(
            origin,
            init_temp=kwargs.get("init_temp", 100),
            final_temp=kwargs.get("final_temp", 0.1),
            cooling_rate=kwargs.get("cooling_rate", 0.95),
            improvement_checker_count=kwargs.get("improvement_checker_count", 50),
        )
    else:
        return "unknown algorithm"
    # save the best path in the solutions file
    save_solution(solutions, method)
    # parse the plotting results and call the map visualization function
    #coordinates = parse_map_data(results)
    #plot_route(coordinates)

    return

In [6]:
#find_optimal_route(43, 'discrete genetic algorithm', pop_length=100000)
#find_optimal_route(1, 'ant colony optimization')
#find_optimal_route(43, 'simulated annealing')

In [7]:
def get_missing_solutions():
    """
    get_missing_solutions()
    for each algorithm, it gets the nodes that dont have any documented
    solutions to any of the based stations. This is based on the records
    found in the solutions json file.
    returns: a dictionary containing the missing solution for each algorithm
    """
    missing_solutions = {}
    sensors = list(x_y_data.keys())[:-2]
    solutions = load_solutions()

    for algorithm in solutions.keys():
        for sensor in sensors:
            if solutions[algorithm].get(f"Node-{sensor}") == None:
                if algorithm in missing_solutions.keys():
                    missing_solutions[algorithm].append(sensor)
                else:
                    missing_solutions[algorithm] = []
                    missing_solutions[algorithm].append(sensor)

    return missing_solutions    

In [8]:
print(len(get_missing_solutions()['discrete genetic algorithm']))
print(get_missing_solutions())

9
{'discrete genetic algorithm': [71, 76, 93, 132, 133, 136, 137, 146, 147]}


In [9]:
def run_all():
    """
    run_all()
    executes the find_optimal_route() function for all sensors using all algorithms
    for purposes of storing the best solutions for each in the solutions file.
    returns: nothing
    """
    # algorithms to be executed
    methods = ['discrete genetic algorithm', 'ant colony optimization', 'dijkstras algorithm', 'simulated annealing']
    # all sensors except the 2 base stations
    sensors = list(x_y_data.keys())[:-2]
    # for each method, find the best path for each sensor
    for method in methods:
        for sensor in sensors:
            # ignore situations where no best paths will be found. This is typical with DGA and it will kill the function
            try:
                print(f"Sensor {sensor}")
                find_optimal_route(sensor, method)
            except:
                pass        

    return

In [10]:
#run_all()

### Algorithm Performance Evaluation

The various algorithms that we implemented were evaluated against the following metrics:
- Execution time
- Solution quality

The performances were then visualized in a bar chart and table respectively.

#### Execution Time

This metric calculates the time taken in seconds for an algorithm to execute. This measurement when the same type of problem is passed into the various algorithms, for example, getting the best path to any base station from node 43 will be passed in to all the algorithms. The time library was imported to capture execution start time and end time and this the execution time calculated as follows:

`
execution time = execution stop time – execution start time
`

This information is recorded against the algorithm name in a dictionary. After all the executions, the dictionary is used to build a bar chart which shows execution time in seconds as in the figure below.

![Alt](https://livebournemouthac-my.sharepoint.com/:i:/r/personal/s5721725_bournemouth_ac_uk/Documents/Search%20and%20Optimisation/search-and-optimization-projects/assessment/images/metrics%20chart.png?csf=1&web=1&e=VWAkUY)

In [11]:
def get_algorithm_time_metrics():
    """
    get_algorithm_time_metrics()
    this function compares the performance of the various algorithms 
    in terms of execution time and plots them on a bar graph. This is done
    by setting a timer at the beginning of execution of an algorithm and stopping
    it at the end of the execution. The duration is stored in a dictionary against
    the algorithm name as a key in the stop_execution() function. These functions
    are found in the found in the metrics module. The durations are plotted on a
    bar graph for visualization purposes.
    returns: nothing
    """
    # choose any random node to start from with the exception of the base stations
    dest_nodes = [x_y_base_station_1[0], x_y_base_station_2[0]]
    start_node = choice(np.array([i for i in x_y_data.keys() if i not in dest_nodes]), 1, replace=False)[0]
    # list of algorithms whose time performance will be compared
    methods = ['discrete genetic algorithm', 'ant colony optimization', 'dijkstras algorithm', 'simulated annealing']
    for method in methods:
        print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
        print(f"executing {method}...")
        # start execution timer
        start_execution()
        # run the algorithm
        find_optimal_route(start_node, method)
        # stop the timer and record the results
        stop_execution(method)
        print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    # plot the recorded results
    plot_algorithm_execution_time()

    return

In [12]:
#get_algorithm_time_metrics()

In [13]:
def get_algorithm_accuracy_metrics(trials=4):
    """
    get_algorithm_accuracy_metrics()
    ranks the algorithms in terms of the quality of the solutions they provide.
    The ranking is done by running the several runs of all algorithms, ranking 
    the individual ranks then ranking the ranks in order to get a wholistic rank
    of the algorithms. The ranks are then displayed on a table. Rank ties 
    could exist in some scenarios
    trials (optional): number of times you want to run the algorithm to get an
    aggregated rank for more consistency in performance ranking
    returns: nothing
    """
    # choose any random node to start from with the exception of the base stations
    dest_nodes = [x_y_base_station_1[0], x_y_base_station_2[0]]
    # list of algorithms whose time performance will be compared
    methods = ['discrete genetic algorithm', 'ant colony optimization', 'dijkstras algorithm', 'simulated annealing']
    accuracy_performance = []
    # initialize the sum of ranks for overall ranking
    rank_sum = np.zeros(len(methods))
    trial_no = trials
    # create a store for the final rank data
    overall_performance = []
    for trial in range(trial_no):
        start_node = choice(np.array([i for i in x_y_data.keys() if i not in dest_nodes]), 1, replace=False)[0]
        print(f"trial {trial + 1}...")
        print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
        path_costs = []
        for method in methods:      
            print(f"executing {method}; trial {trial + 1}...")
            '''
            run the algorithm and get the solutions data with contains 
            the albolute cost which is used to dictate the quality of 
            the solution. The higher the cost the better the rank
            '''
            if method == "discrete genetic algorithm":
                results, solutions = discrete_genetic_algorithm(start_node)
            elif method == "dijkstras algorithm":
                results, solutions = dijkstra_algorithm(start_node)
            elif method == "ant colony optimization":
                results, solutions = ant_colony_optimization(start_node)
            elif method == "simulated annealing":
                results, solutions = simulated_annealing(start_node)
            path_costs.append(float(solutions[0]['absolute path cost']))
        # rank the path costs. negate the costs so that ranking is done in descending order
        ranked_costs = stat.rankdata([-cost for cost in path_costs])
        accuracy_performance.append(list(map(int, ranked_costs)))
        print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++")
    #compile the individual trial ranks into a single wholistic rank
    for ranks in range(len(accuracy_performance)):
        for method in range(len(methods)):
            rank_sum[method] += accuracy_performance[ranks][method]
    # rank the sum of the individual ranks
    rank_data = list(map(int,stat.rankdata(rank_sum)))
    for i in range(len(rank_data)):
        overall_performance.append([rank_data[i], methods[i]])
    # tabulate the overall rank
    show_algorithm_accuracy_rank(overall_performance)

    return

In [14]:
#get_algorithm_accuracy_metrics()

In [15]:
for algorithm, sensors in get_missing_solutions().items():
    for sensor in sensors:
        try:
            print(f"Sensor {sensor}")
            find_optimal_route(sensor, algorithm)
        except:
            pass


Sensor 71
**********************************Generation 1**********************************
********************************************Performance**********************************
*****************************************************************************************
No Solution Found to base station ID 151
**********************************Generation 1**********************************
********************************************Performance**********************************
*****************************************************************************************
No Solution Found to base station ID 152
Sensor 76
**********************************Generation 1**********************************
********************************************Performance**********************************
*****************************************************************************************
No Solution Found to base station ID 151
**********************************Generation 1****************************