# Initiate Spark Session

In [1]:
import findspark
findspark.init()
findspark.find()
import math
import pyspark
findspark.find()
from pyspark import SparkContext, SparkConf
from pyspark.sql import SparkSession
spark=SparkSession.builder.appName("PySpark_Testing").getOrCreate()
sc = spark.sparkContext

# 1) Giant Route

### Import instances + remove header + map to key(node_id,[Latitude,Longitude, Demand]) 

In [2]:
data_RDD = sc.textFile("CVRP_instances").filter(lambda line: "Node_id" not in line)\
        .map(lambda x: (x.split('\t')[0],[float(item) for item in x.split('\t')[1:]])).persist()

data_RDD.take(4)

[('Depot', [35.53, 35.77, 0.0]),
 ('Customer 1', [35.571807, 35.826632, 534.0]),
 ('Customer 2', [35.605405, 35.791175, 129.0]),
 ('Customer 3', [35.5833333, 35.7833333, 53.0])]

### Create ([Latitude,Longitudem, demand],node_id) list

In [3]:
xy_list = data_RDD.map(lambda x: (x[1][0:3], x[0])).collect()

xy_list[0:4]

[([35.53, 35.77, 0.0], 'Depot'),
 ([35.571807, 35.826632, 534.0], 'Customer 1'),
 ([35.605405, 35.791175, 129.0], 'Customer 2'),
 ([35.5833333, 35.7833333, 53.0], 'Customer 3')]

### create the distance matrix

In [4]:
distance_rdd = data_RDD.mapValues(lambda source: [(node[1],\
                                    math.sqrt((source[0]-node[0][0])**2 + (source[1]-node[0][1])**2),node[0][2])\
                                             for node in xy_list]).persist()
distance_rdd.take(4)

[('Depot',
  [('Depot', 0.0, 0.0),
   ('Customer 1', 0.07039182248670033, 534.0),
   ('Customer 2', 0.0783217380425092, 129.0),
   ('Customer 3', 0.05497470125230167, 53.0),
   ('Customer 4', 0.08095761593574287, 74.0),
   ('Customer 5', 0.10532689117694036, 155.0),
   ('Customer 6', 0.027443130087508025, 62.0),
   ('Customer 7', 0.11272853999320188, 351.0),
   ('Customer 8', 0.0701604806782233, 45.0),
   ('Customer 9', 0.09953560631753315, 54.0),
   ('Customer 10', 0.11434936766331112, 66.0),
   ('Customer 11', 0.10215311160214088, 76.0),
   ('Customer 12', 0.10926667570672825, 830.0),
   ('Customer 13', 0.21260641977606906, 26.0),
   ('Customer 14', 0.14051757505023674, 30.0),
   ('Customer 15', 0.2215103748653724, 193.0),
   ('Customer 16', 0.13967325450850976, 49.0),
   ('Customer 17', 0.12716280796679041, 128.0),
   ('Customer 18', 0.2625040251310427, 304.0),
   ('Customer 19', 0.2566448683628738, 21.0),
   ('Customer 20', 0.1552709531399853, 63.0),
   ('Customer 21', 0.1472058462

### Nearest Neighbor method(constructing giant route)

In [5]:
def NNM(distance_rdd):
    '''
    Find the Giant Route for given the an RDD of the distance matrix.
    
    Starts from depot and find the nearest node from the current node based on their eculidean distances.
    
    Returns a RDD from the (key, value1,value2) shape, where the key is the node_id and the value is the 
    distance traveled to the node and the demand of boxes in that node
    
    The returned Rdd is the Giant Route
    
    '''
    # The nodes sequence of the route based on the lowest cost(distance)
    route_nodes = ['Depot']
    
    # Starting node (depot)
    source_node = "Depot"
    
    #Demand by node:
    demand = [float(0)]
    
    # Distance traveld(depot)
    route_distance = [float(0)]
    
    # number of nodes (customers + depot) in the problem
    number_of_nodes = distance_rdd.count()
    
    # number of nodes already visited(depot already visited)
    nodes_visited = 1
    
    while nodes_visited <= number_of_nodes:

        current_node = route_nodes[-1]

        # Ordering the distances from the source to the rest of nodes in ascending order
        current_distances =  distance_rdd.filter(lambda x: x[0] == current_node)\
                                .flatMap(lambda x: x[1]).sortBy(lambda x: x[1]).collect()
        # Finding the closest node and not visited yet
        for node in current_distances:
            if node[0] not in route_nodes:
                #Add the found node to the route
                route_nodes.append(node[0])
                #Add the distance traveld
                route_distance.append(node[1])
                # Add the demand of the node
                demand.append(node[2])
                break
            else:
                continue
        nodes_visited += 1
        print("{}\n".format(route_nodes))
        

    # Adding the route from the last node back to the depot
    last_node_rdd = distance_rdd.filter(lambda x: x[0] == route_nodes[-1]).flatMap(lambda x: x[1])\
                    .filter(lambda x: x[0]=='Depot').collect()
    
    route_nodes.insert(len(route_nodes), last_node_rdd[0][0])
    route_distance.insert(len(route_distance), last_node_rdd[0][1])
    demand.insert(len(demand), float(0))
    
    
    output = sc.parallelize(zip(route_nodes, route_distance, demand))
    
    return output
     
giant_route = NNM(distance_rdd)   
giant_route.collect()

['Depot', 'Customer 6']

['Depot', 'Customer 6', 'Customer 1']

['Depot', 'Customer 6', 'Customer 1', 'Customer 4']

['Depot', 'Customer 6', 'Customer 1', 'Customer 4', 'Customer 8']

['Depot', 'Customer 6', 'Customer 1', 'Customer 4', 'Customer 8', 'Customer 3']

['Depot', 'Customer 6', 'Customer 1', 'Customer 4', 'Customer 8', 'Customer 3', 'Customer 2']

['Depot', 'Customer 6', 'Customer 1', 'Customer 4', 'Customer 8', 'Customer 3', 'Customer 2', 'Customer 5']

['Depot', 'Customer 6', 'Customer 1', 'Customer 4', 'Customer 8', 'Customer 3', 'Customer 2', 'Customer 5', 'Customer 10']

['Depot', 'Customer 6', 'Customer 1', 'Customer 4', 'Customer 8', 'Customer 3', 'Customer 2', 'Customer 5', 'Customer 10', 'Customer 24']

['Depot', 'Customer 6', 'Customer 1', 'Customer 4', 'Customer 8', 'Customer 3', 'Customer 2', 'Customer 5', 'Customer 10', 'Customer 24', 'Customer 26']

['Depot', 'Customer 6', 'Customer 1', 'Customer 4', 'Customer 8', 'Customer 3', 'Customer 2', 'Customer 5', 'Custo

[('Depot', 0.0, 0.0),
 ('Customer 6', 0.027443130087508025, 62.0),
 ('Customer 1', 0.044167710309679586, 534.0),
 ('Customer 4', 0.012428595133803295, 74.0),
 ('Customer 8', 0.016383234265551577, 45.0),
 ('Customer 3', 0.031043583468728, 53.0),
 ('Customer 2', 0.02342332597604172, 129.0),
 ('Customer 5', 0.03603866097678819, 155.0),
 ('Customer 10', 0.023976469131216, 66.0),
 ('Customer 24', 0.037902128726495773, 481.0),
 ('Customer 26', 0.056933789132637476, 50.0),
 ('Customer 30', 0.033078502701911476, 91.0),
 ('Customer 7', 0.02779818256649216, 351.0),
 ('Customer 9', 0.017002027232068794, 54.0),
 ('Customer 12', 0.019830708030731534, 830.0),
 ('Customer 11', 0.018629632578234002, 76.0),
 ('Customer 17', 0.03635184077869945, 128.0),
 ('Customer 16', 0.01255072623396791, 49.0),
 ('Customer 14', 0.013924461533573433, 30.0),
 ('Customer 20', 0.015021004127555746, 63.0),
 ('Customer 21', 0.025835495737457186, 27.0),
 ('Customer 27', 0.05449025800086292, 37.0),
 ('Customer 25', 0.0403943

# 2) partitioning the giant route into clusters based on the following constraints:
1. Capacity constraint: Cmax = 1000 boxes
2. Traveling constraint: Dmax = 300 km

In [6]:
def solve(giant_route,distance_rdd, num_vehicle, max_capacity,max_dist):
    '''
    Takes the giant route Rdd and the distance matrix as input.
    '''
    total_net_profits = []
    giant_route = giant_route.collect()
    clusters = []
    cluster = []
    capacity = 0
    dis = 0
    
    for node in giant_route:
        # Send a new vehicle if the capacity or dis is max
        if capacity+node[2] >= max_capacity or dis+node[1] >= max_dist:
            # add the route of the vehicle to the clusters
            clusters.append(cluster)
            # add net profit of the tour
            total_net_profits.append(-dis)
            # start a new route
            cluster = [node[0]]
            capacity = node[2] 
            dis = node[1]
            
        else:
            cluster.append(node[0])
            dis += node[1]
            capacity += node[2]
    
    # sending vehicle to the last node if not included:
    clusters.insert(len(clusters),[giant_route[-2][0]])
    
    # If number of vehicles usedexceed 10
    if len(clusters) > num_vehicle:
        print("More vehicles needed to solve the problem")
        return
        
    
    #Adding distances from and to depot to the cost
    costs_from_depot = []
    for cluster in clusters:
        if cluster[0] != 'Depot':
            from_depot_cost = distance_rdd.filter(lambda x: x[0] == 'Depot').flatMap(lambda x: x[1])\
                    .filter(lambda x: x[0]==cluster[0]).collect()[0][1]
            costs_from_depot.append(-from_depot_cost)
        else:
            costs_from_depot.append(float(0))
            
    costs_to_depot = []
    for cluster in clusters:
        to_depot_cost = distance_rdd.filter(lambda x: x[0] == cluster[-1]).flatMap(lambda x: x[1])\
                    .filter(lambda x: x[0]=='Depot').collect()[0][1]
        costs_to_depot.append(-to_depot_cost)
        
    
    # Updating total net profit:
    total_net_profits.append(0)
    total_net_profits = [a + b + c for a, b, c in zip(costs_from_depot,costs_to_depot,total_net_profits)]

    # Adding depot to the begin and end of each vehicle's tour
    for tour in clusters:
        if tour[0] != "Depot":
            tour.insert(0,"Depot")
        tour.insert(len(tour), 'Depot')

    # printing the solutions:
    print("Number of Vehicle used: {}\n".format(len(clusters)))
    for i in range(len(clusters)):
        print("Net_profit: {} \ncustomers visited by vehicle {}: {}\n\n".format(total_net_profits[i],\
                                                                                str(i+1),clusters[i]))
    print("\n\nTotal Net profits: {}".format(sum(total_net_profits)))

## Solve for num_vehicle=10, max_capacity=1000,max_dist=300

In [7]:
solve(giant_route,distance_rdd, num_vehicle=10, max_capacity=1000,max_dist=300)

Number of Vehicle used: 7

Net_profit: -0.23321131728382144 
customers visited by vehicle 1: ['Depot', 'Customer 6', 'Customer 1', 'Customer 4', 'Customer 8', 'Customer 3', 'Customer 2', 'Depot']


Net_profit: -0.42620793733588375 
customers visited by vehicle 2: ['Depot', 'Customer 5', 'Customer 10', 'Customer 24', 'Customer 26', 'Customer 30', 'Depot']


Net_profit: -0.257064356109296 
customers visited by vehicle 3: ['Depot', 'Customer 7', 'Customer 9', 'Depot']


Net_profit: -0.24988012791783465 
customers visited by vehicle 4: ['Depot', 'Customer 12', 'Customer 11', 'Depot']


Net_profit: -0.6193870016196659 
customers visited by vehicle 5: ['Depot', 'Customer 17', 'Customer 16', 'Customer 14', 'Customer 20', 'Customer 21', 'Customer 27', 'Customer 25', 'Customer 29', 'Customer 28', 'Customer 13', 'Depot']


Net_profit: -0.5718381994937665 
customers visited by vehicle 6: ['Depot', 'Customer 23', 'Customer 22', 'Customer 15', 'Customer 19', 'Depot']


Net_profit: -0.52500805026208

## Solve for num_vehicle=6, max_capacity=1000,max_dist=300

In [8]:
solve(giant_route,distance_rdd, num_vehicle=6, max_capacity=1000,max_dist=300)

More vehicles needed to solve the problem


## Solve for num_vehicle=100, max_capacity=100,max_dist=300

In [9]:
solve(giant_route,distance_rdd, num_vehicle=100, max_capacity=100,max_dist=300)

Number of Vehicle used: 27

Net_profit: -0.05488626017501605 
customers visited by vehicle 1: ['Depot', 'Customer 6', 'Depot']


Net_profit: -0.18495135528308024 
customers visited by vehicle 2: ['Depot', 'Customer 1', 'Depot']


Net_profit: -0.17434382700528903 
customers visited by vehicle 3: ['Depot', 'Customer 4', 'Depot']


Net_profit: -0.17256199966480457 
customers visited by vehicle 4: ['Depot', 'Customer 8', 'Customer 3', 'Depot']


Net_profit: -0.18006680206106013 
customers visited by vehicle 5: ['Depot', 'Customer 2', 'Depot']


Net_profit: -0.24669244333066892 
customers visited by vehicle 6: ['Depot', 'Customer 5', 'Depot']


Net_profit: -0.25267520445783825 
customers visited by vehicle 7: ['Depot', 'Customer 10', 'Depot']


Net_profit: -0.32368215476749784 
customers visited by vehicle 8: ['Depot', 'Customer 24', 'Depot']


Net_profit: -0.3591107706517449 
customers visited by vehicle 9: ['Depot', 'Customer 26', 'Depot']


Net_profit: -0.2989814936817004 
customers visi