In [1]:
import json
import folium

import numpy as np
import pandas as pd

from sklearn.cluster import KMeans

In [2]:
with open('/Users/ashutoshshukla/Desktop/Current_Projects/MITRC/version_1/data/model_build_inputs/route_data.json') as f:
    data = json.load(f)
    
with open('/Users/ashutoshshukla/Desktop/Current_Projects/MITRC/version_1/data/model_build_inputs/actual_sequences.json') as f:
    data_actual = json.load(f)
    
with open('/Users/ashutoshshukla/Desktop/Current_Projects/MITRC/version_1/data/model_build_inputs/travel_times.json') as f:
    data_travel_time = json.load(f)

In [3]:
route_ids = list(data.keys())

# Updates

* Got acquainted with all the data resources they have shared.


* Ran a "pseudo-baseline" model using cli-interface. Aware of entire pipeline (how to train, test and score)
* Did some visualization (will present next)
* Implemented and ran a TSP formulation (will discuss next)



* Concerns with IRL (Discuss when suitable)
    * How will you model cap constraints ?
    * How will you model time window constraints ?
    * How will you do sequencing ?
    * What are you augmenting the data with to learn the utility function ?


* Clarification on sample route and actual sequence

In [4]:
locations = []

for i in range(len(route_ids)):
    if (i%1 == 0):        
        route_dict = data[route_ids[i]]["stops"]
        lat = []
        long = []
        for j in route_dict:
            lat.append(route_dict[j]['lat'])       
            long.append(route_dict[j]['lng'])
        locations.append([sum(lat)/len(lat), sum(long)/len(long)])

In [5]:
m = folium.Map(location=[29.749907, -95.358421])

for i in range(len(locations)):
    if (i%20 == 0):
        folium.Marker(location=[locations[i][0], locations[i][1]], 
                            icon=folium.Icon(color='red', icon_color='red', icon='map-pin')).add_to(m)

m

## K means clustering to indentify cities to get route count per city

In [6]:
kmeans = KMeans(n_clusters=5, random_state=0).fit(locations)
df = pd.DataFrame(kmeans.labels_)
kmeans.cluster_centers_

array([[  42.24566308,  -71.28297334],
       [  33.91840379, -117.92606151],
       [  41.98749351,  -87.97228754],
       [  47.53498246, -122.28162406],
       [  30.31341077,  -97.81325356]])

In [7]:
df[df[0] == 4].shape

(214, 1)

# Observation 1

1. Keys are some sort of area planning code for amazon. May not have much relevance to the real world. 

2. Cities in training data:

* location_zip = {}
* location_zip['Seatle']  = (47.60, -122.33)     # 1079 locations
* location_zip['Austin']  = (30.26, -97.74)      # 214 locations
* location_zip['Chicago'] = (41.87, -87.62)      # 1002 locations
* location_zip['LA']      = (34.05, -118.24)     # 2888 locations
* location_zip['Boston']  = (42.36, -71.05)      # 929 locations

# Observation 2: Visualizing a route


#### * Questions: Do we always get feasible routes ?

In [8]:
route_number = 400

temp = {k: v for k, v in sorted(data_actual[route_ids[route_number]]['actual'].items(), key=lambda item: item[1])}
coordinates = []
for i in temp:
    lat = data[route_ids[route_number]]['stops'][i]['lat']
    long = data[route_ids[route_number]]['stops'][i]['lng']
    coordinates.append([lat, long])

In [9]:
# Create the map and add the line
m = folium.Map(location=[coordinates[0][0], coordinates[0][1]], zoom_start=12)
my_PolyLine=folium.PolyLine(locations=coordinates,weight=3)
m.add_child(my_PolyLine)
m

In [10]:
matrix_order = list(temp.keys())

distances = []
for i in matrix_order:
    temp_d = []
    for j in matrix_order:
        
        temp_d.append(data_travel_time[route_ids[route_number]][i][j])
    distances.append(temp_d)

In [11]:
"""Simple travelling salesman problem between cities."""

from ortools.constraint_solver import routing_enums_pb2
from ortools.constraint_solver import pywrapcp


def create_data_model():
    """Stores the data for the problem."""
    data = {}
    data['distance_matrix'] = distances
    data['num_vehicles'] = 1
    data['depot'] = 0
    return data


def print_solution(manager, routing, solution):
    
    """Prints solution on console."""
    print('Objective: {} miles'.format(solution.ObjectiveValue()))
    index = routing.Start(0)
    plan_output = 'Route for vehicle 0:\n'
    route_distance = 0
    
    solution_collector = []
    
    while not routing.IsEnd(index):
        temp = manager.IndexToNode(index)
        solution_collector.append(temp)
        plan_output += ' {} ->'.format(temp)
        previous_index = index
        index = solution.Value(routing.NextVar(index))
        route_distance += routing.GetArcCostForVehicle(previous_index, index, 0)
    plan_output += ' {}\n'.format(manager.IndexToNode(index))
    print(plan_output)
    plan_output += 'Route distance: {}miles\n'.format(route_distance)
    
    return solution_collector



# def main():
#     """Entry point of the program."""
#     # Instantiate the data problem.
#     data = create_data_model()

#     # Create the routing index manager.
#     manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']),
#                                            data['num_vehicles'], data['depot'])

#     # Create Routing Model.
#     routing = pywrapcp.RoutingModel(manager)


#     def distance_callback(from_index, to_index):
#         """Returns the distance between the two nodes."""
#         # Convert from routing variable Index to distance matrix NodeIndex.
#         from_node = manager.IndexToNode(from_index)
#         to_node = manager.IndexToNode(to_index)
#         return data['distance_matrix'][from_node][to_node]

#     transit_callback_index = routing.RegisterTransitCallback(distance_callback)

#     # Define cost of each arc.
#     routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)

#     # Setting first solution heuristic.
#     search_parameters = pywrapcp.DefaultRoutingSearchParameters()
#     search_parameters.first_solution_strategy = (
#         routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)

#     # Solve the problem.
#     solution = routing.SolveWithParameters(search_parameters)

    
#     # Print solution on console.
#     if solution:
#         solution_collector = print_solution(manager, routing, solution)


# if __name__ == '__main__':
#     main()

In [12]:
"""Entry point of the program."""
# Instantiate the data problem.
data = create_data_model()

# Create the routing index manager.
manager = pywrapcp.RoutingIndexManager(len(data['distance_matrix']),
                                       data['num_vehicles'], data['depot'])

# Create Routing Model.
routing = pywrapcp.RoutingModel(manager)


def distance_callback(from_index, to_index):
    """Returns the distance between the two nodes."""
    # Convert from routing variable Index to distance matrix NodeIndex.
    from_node = manager.IndexToNode(from_index)
    to_node = manager.IndexToNode(to_index)
    return data['distance_matrix'][from_node][to_node]

transit_callback_index = routing.RegisterTransitCallback(distance_callback)

# Define cost of each arc.
routing.SetArcCostEvaluatorOfAllVehicles(transit_callback_index)

# Setting first solution heuristic.
search_parameters = pywrapcp.DefaultRoutingSearchParameters()
search_parameters.first_solution_strategy = (
    routing_enums_pb2.FirstSolutionStrategy.PATH_CHEAPEST_ARC)

# Solve the problem.
solution = routing.SolveWithParameters(search_parameters)


# Print solution on console.
if solution:
    solution_collector = print_solution(manager, routing, solution)

Objective: 15140 miles
Route for vehicle 0:
 0 -> 198 -> 196 -> 199 -> 200 -> 201 -> 197 -> 202 -> 191 -> 190 -> 186 -> 187 -> 189 -> 188 -> 183 -> 184 -> 185 -> 182 -> 180 -> 177 -> 178 -> 61 -> 62 -> 63 -> 64 -> 60 -> 173 -> 174 -> 176 -> 175 -> 41 -> 37 -> 36 -> 44 -> 43 -> 42 -> 59 -> 52 -> 51 -> 50 -> 49 -> 48 -> 82 -> 81 -> 88 -> 135 -> 87 -> 86 -> 85 -> 84 -> 83 -> 129 -> 130 -> 128 -> 127 -> 134 -> 133 -> 132 -> 131 -> 170 -> 169 -> 168 -> 167 -> 161 -> 162 -> 163 -> 164 -> 165 -> 166 -> 160 -> 159 -> 158 -> 157 -> 156 -> 155 -> 154 -> 153 -> 145 -> 146 -> 147 -> 148 -> 149 -> 150 -> 151 -> 144 -> 152 -> 171 -> 172 -> 65 -> 66 -> 67 -> 68 -> 69 -> 80 -> 89 -> 91 -> 90 -> 98 -> 97 -> 96 -> 95 -> 94 -> 93 -> 77 -> 76 -> 75 -> 73 -> 70 -> 71 -> 72 -> 74 -> 92 -> 78 -> 79 -> 99 -> 100 -> 101 -> 103 -> 102 -> 104 -> 105 -> 106 -> 107 -> 108 -> 111 -> 110 -> 109 -> 120 -> 119 -> 121 -> 122 -> 123 -> 118 -> 116 -> 115 -> 117 -> 112 -> 113 -> 114 -> 126 -> 125 -> 124 -> 181 -> 143 -> 1

In [13]:
temp_tsp = []

for i in solution_collector:
    temp_tsp.append(list(temp.keys())[list(temp.values()).index(i)])

In [14]:

with open('/Users/ashutoshshukla/Desktop/Current_Projects/MITRC/version_1/data/model_build_inputs/route_data.json') as f:
    data = json.load(f)

route_number = 400

# Actual sequence
temp = {k: v for k, v in sorted(data_actual[route_ids[route_number]]['actual'].items(), key=lambda item: item[1])}
coordinates = []
for i in temp:
    lat = data[route_ids[route_number]]['stops'][i]['lat']
    long = data[route_ids[route_number]]['stops'][i]['lng']
    coordinates.append([lat, long])
    

    
# tsp co-ordinates    
coordinates_tsp = []
for i in temp_tsp:
    lat = data[route_ids[route_number]]['stops'][i]['lat']
    long = data[route_ids[route_number]]['stops'][i]['lng']
    coordinates_tsp.append([lat, long])

In [15]:
# Create the map and add the line
m = folium.Map(location=[coordinates[5][0], coordinates[5][1]], zoom_start=13)
my_PolyLine=folium.PolyLine(locations=coordinates,weight=3)

my_PolyLine1=folium.PolyLine(locations=coordinates_tsp,weight=3, color="red")


m.add_child(my_PolyLine)
m.add_child(my_PolyLine1)
m