imports

In [None]:
# imports 
import pandas as pd
import numpy as np
import json

loading data

In [None]:
# location dataframe
location_df = pd.read_csv('SaO_Optilandia_resub_locations.csv')

# links dataframe
links_df = pd.read_csv('SaO_Optilandia_resub_links.csv')

# extract lorry data from json
lorry_data = json.load(open('SaO_Optilandia_resub_depot_lorries.json', 'r'))

# set count to 0
k = 0

# initialise lorry list
lorry = []

# loop -> set i to the respective lorry key
for i in lorry_data.keys():
    # set j to the the number of lorries at key 'i'
    for j in range(0, len(lorry_data[i])):
        # append each lorry in lorry_data to lorry list
        lorry.append(pd.DataFrame(lorry_data[i][j], index=[k]))
        # accumulate index
        k += 1

# lorry dataframe
lorry_df = pd.concat(lorry)

In [None]:
# setting required column 
location_df['required'] = location_df['capacity']-location_df['level']

# displaying rows where is_customer true
location_df[location_df['is_customer']==True].head()

In [None]:
# add 'depot' column to lorry_df and updae with the lorry's relative depot 
lorry_df['depot'] = lorry_df.lorry_id.apply(lambda x: x.split('-')[0])
lorry_df.head()

In [None]:
# list of depot locations (where nodes == depot)
depot_locations = np.where(location_df.is_depot)[0]

# list of customer locations (where nodes == customers)
customer_locations = np.where(location_df.is_customer)[0]

imports + visualising the map

In [None]:
# imports
import networkx as nx
from scipy.spatial.distance import pdist, squareform
import matplotlib.pyplot as plt 

# parwise distance calculation for each node
euclidean = squareform(pdist(location_df[['x', 'y']]))

# edges list initialisation
edges = []

# loop through links_df rows
for _, (i, j) in links_df.iterrows():
    # append node at i, node at j, and their pairwise distance to edges
    edges.append((i, j, euclidean[i, j]))

# pos dict intialisation
pos = {}

# loop through location_df rows
for k, v in location_df[['x', 'y']].iterrows():
    # update pos dict with array of k, v 
    pos.update({k:v.values})

# initialise depot_labels dict
depot_labels = {}

# loop throgugh depot_locations
for i in depot_locations:
    # update depot_labels dict with {i:i}
    depot_labels.update({i:i})

# initialise customer_labels dict
customer_labels = {}

# loop through customer_locations
for i in customer_locations:
    # update customer_labels dict with {i:i}
    customer_labels.update({i:i})

# initialise nx Graph
G = nx.Graph()

# feed node list to G
G.add_nodes_from(location_df['id'].to_numpy())

# feed edges list to G
G.add_weighted_edges_from(edges)

# resize figure 
plt.figure(figsize=(16, 8))

# sketch graph
nx.draw(G, pos=pos, node_size=40)

# label depot nodes
nx.draw_networkx_labels(G, pos, depot_labels)

# label customer nodes
nx.draw_networkx_labels(G, pos, customer_labels)

# mark depot nodes
nx.draw_networkx_nodes(G, pos=pos, nodelist=depot_locations, node_color='r', node_size=400, alpha=0.9)

# mark customer nodes
nx.draw_networkx_nodes(G, pos=pos, nodelist=customer_locations, node_color='g', node_size=200, alpha=0.3)

clustering nodes ... (nearest-neighbour approach)

In [None]:
# intialise cluster dict
cluster = {124:[], 127:[], 167:[], 523:[]}

# intialise nodes list
nodes = [] 

# loop through each node in customer_locations
for node in customer_locations:
    # check if node in nodes
    if node not in nodes:
        # initialise dist list
        dist = []
        # loop through each depot key
        for depot in cluster.keys():
            # append euclidean weights to dist 
            dist.append(euclidean[node, depot])
        # get shortest distance
        shortestDist = min(dist)
        # match shortest distance to equivalent node index
        nearestDepotIndex = np.where(euclidean[node]==shortestDist)
        # add node to relative nearest depot location
        cluster[int(nearestDepotIndex[0])].append(node)
        # track applied nodes
        nodes.append(node)
        # clear dist
        dist.clear()

# print allocated nodes to relative cluster points (depot locations)
print(cluster)

# clear nodes list
nodes.clear()

(breadth-first-search used for pathfinding)

In [None]:
# Routing from A to B using recursive Breadth-First-Search based algorithm (pathfinding)
def routing(currState, toState, edges):
    # intialise visitedState list for tracking node traversal
    visitedState = [currState]
    # initialise visitedEdge list for tracking edge traversal
    visitedEdge = []
    # intialise edgeMemory list for storing the explored edges
    edgeMemory = []
    # intialise queue list for choosing the central node for next traversal
    queue = [currState]

    # loop while toState is not found
    while currState != toState:
        # remove and store the last element of the queue list as q
        q = queue.pop(0)
        
        # intialise currEdges dict which holds the next set of edges for traversals
        currEdges = {}
        # get the nodes at each edge, where either nodes are equivalent to q
        for edge in list(np.where(links_df[['id1', 'id2']]==q)[0]):
            # update the dict with the relative edge key and the node pairs
            currEdges.update({edge:[edges[edge][0], edges[edge][1]]})
        
        # store the explored edges in edgeMemory list
        edgeMemory.append(currEdges)

        # loop through each edge in currEdges
        for edge in currEdges:
            # check if the edge has been visited 
            if edge not in visitedEdge:
                # if not visited then add the edge to visitedEdge
                visitedEdge.append(edge)
                # check the node index in the edge that has not been visited 
                if currEdges[edge][0] not in visitedState and currEdges[edge][1] in visitedState:
                    # set currState to the unvisited node 
                    currState = currEdges[edge][0]
                    # mark the node in currState as visited 
                    visitedState.append(currState)
                    # add new currState to queue 
                    queue.append(currState)
                    # check if toState reached
                    if currState == toState:
                        # set currState to toState
                        currState = toState
                        # end loop
                        break 
                # similar to above but in the context of different index position of the node that has not been visited
                if currEdges[edge][1] not in visitedState and edges[edge][0] in visitedState:
                    currState = currEdges[edge][1]
                    visitedState.append(currState)
                    queue.append(currState)
                    if currState == toState:
                        currState = toState
                        break
    
    # set startState as the first node in visitedState list
    startState = visitedState[0]
    # set lastQ as the toState for tracking q node from end of order
    lastQ = [toState]
    # intialise backtrace list for backtracing the edges from edgeMemory
    backtrace = []
    # initialise nodetrace list for backtracing the nodes from edgeMemory
    nodetrace = []

    # intialise edgeMemoryReversed for reordering edgeMemory 
    edgeMemoryReversed = []
    # loop through each index between range 0 and length of edgeMemory
    for i in range (0, len(edgeMemory)):
        # set endElement to the last element in edgeMemory
        endElement = edgeMemory.pop(-1)
        # add the endElement to edgeMemoryReversed
        edgeMemoryReversed.append(endElement)
    
    # while last element in lastQ is not equivalent to the startState
    while lastQ[-1] != startState:
        # loop through each edge options in edgeMemoryReversed
        for edgeOpt in edgeMemoryReversed:
            # loop through each edge from as keys of the edge options
            for edge in edgeOpt.keys():

                # check if last element of lastQ is in the set of edge options given the edge
                if lastQ[-1] in edgeOpt[edge]:
                    # add the edge to backtrace 
                    backtrace.append(edge)
                    # check index of node which matches the lastQ element 
                    if lastQ[-1] == edgeOpt[edge][0] and lastQ[-1] != edgeOpt[edge][1]:
                        # update lastQ as the the node which does not match the lastQ element
                        lastQ.append(edgeOpt[edge][1])
                        # add the node to nodetrace
                        nodetrace.append(edgeOpt[edge][1])
                        # return to while iterate
                        break
                        # similar to above but in the context of different index postion of the matching node with lastQ element
                    if lastQ[-1] != edgeOpt[edge][0] and lastQ[-1] == edgeOpt[edge][1]:
                        lastQ.append(edgeOpt[edge][0])
                        nodetrace.append(edgeOpt[edge][0])
                        break
    
    # re-ordering edges from start to end
    edgeTraversed = []
    for i in range(0, len(backtrace)):
        endElement = backtrace.pop(-1)
        edgeTraversed.append(endElement)

    # re-ordering nodes from start to end 
    nodeOrder = []
    for i in range(0, len(nodetrace)):
        endElement = nodetrace.pop(-1)
        nodeOrder.append(endElement)
    
    # adding route weight (distance between nodes) to each traversal made
    routeWeight = []
    for edge in edgeTraversed:
        routeWeight.append(edges[edge][2])

    # return the the order in which nodes were visited and the order in which edges were traversed
    return nodeOrder, edgeTraversed, routeWeight

In [None]:
# test: gives the node order in routing
print(f'node order: {routing(124, 10, edges)[0]}')
# test: gives the traversed edges in routing
print(f'edge order: {routing(124, 10, edges)[1]}')
# test: gives the weight routes for traversed
print(f'weights: {routing(124, 10, edges)[2]}')

genetic algorithm for exploring solution set

In [None]:
# initailise routes dict 
routes = {}

# loop through lorry_df index
for i in lorry_df.index:
    # for each lorry, initialise journey by appending start (depot) and capacity (self)
    routes.update({lorry_df['lorry_id'][i]:[(lorry_df.depot[i]), lorry_df.capacity[i]]})

In [None]:
# display routes so far
routes

In [None]:
# imports 
import random

# function to generate a random path order (consider as genome) for each depot # returns a dict 
def randomPathArrangement(cluster):
    randomPathArr = {}
    customerAllocation = cluster
    for depot in depot_locations:
        randomPathArr.update({depot:random.sample(customerAllocation[depot], len(customerAllocation[depot]))})
    return randomPathArr

In [None]:
print(randomPathArrangement(cluster))

In [None]:
# function to generate multiple initial solutions (population) # returns genomes (individual solution) in a list
def population_init(cluster, size):
    population = []
    for i in range(0, size):
        population.append(randomPathArrangement(cluster))
    return population

In [None]:
# for test (cluster = {depot:customers} (genome), size = NUMBER OF GENOMES)
tempPop = population_init(cluster, 4)

In [None]:
# for test
for i in tempPop:
    print(f'\ntempPop:\n{i}') 

In [None]:
# return populationFitness at each index...
def fitness(population, edges):

    # to store each weight per traversal made between two nodes (when calculating traversal distance per depot)
    genomeWeight = []

    # for a possible solution in the set of solutions (genome in population)
    for genome in population:
        # initialise genomeFitness dict to store distances at each depot per genome 
        genomeFitness = {}
        for depot in genome.keys():
            genomeFitness.update({depot:[]})
            nodes = genome[depot]
            # loop for length of nodes
            for idx in range(len(nodes)-1):
                # set nodeA to current loop index
                nodeA = nodes[idx]
                # set nodeB to next loop index
                nodeB = nodes[idx+1]
                # get sum of each traversal distance occurence between two nodes | routing() enables this 
                distance = sum(routing(nodeA, nodeB, edges)[2])
                genomeFitness[depot].append(distance)
        # update popFitness list with each genome and relative fitness values
        genomeWeight.append(genomeFitness)

    genomeWeights = []
    for genome in genomeWeight:
        # initialise popWeight list to store total weight at each depot for genome
        depotWeight = []
        # loop through each depot (chromosome per genome)
        for depot in genome:
            depotWeight.append({depot:sum(genome[depot])})
        genomeWeights.append(depotWeight)
    
    # to store the weight per index (weight per genome) [each solution indicated by index i.e., 0, 1, 2 etc.]
    weightIndex = {}
    for idx, genome in enumerate(genomeWeights):
        tempIndexStore = []
        for depot in genome:
            for i in depot.values():
                tempIndexStore.append(i)
        weightIndex.update({idx:sum(tempIndexStore)})
    
    return population, weightIndex

In [None]:
# for test
tempPopWeights = fitness(tempPop, edges)[1]

In [None]:
# selection() uses weighted selection probability for returning 2 cluster arrangement (2 genomes)
def selection(population, popWeights):
    # selectionPair defines a list containing # of genomes selected through concept of roulette-wheel (in this case k=2). 
    selectionPair = random.choices(population=population, weights=popWeights, k=2)
    # return the selectionPair (in form list)
    return selectionPair 

In [None]:
# for test
tempSelectionPair = selection(tempPop, tempPopWeights)