-
Notifications
You must be signed in to change notification settings - Fork 3
/
hubway.py
247 lines (206 loc) · 9.66 KB
/
hubway.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#! python
"""
Find the best route that connects all the rental stations in the Boston Hubway
bike rental network (http://www.thehubway.com/). This is essentially a
travelling salesman problem.
Richard West
r.h.west@gmail.com
"""
from xml.etree.ElementTree import ElementTree
import urllib2
import urllib
import json
import os
import numpy
import time
import random
class Station():
def __init__(self):
self.id = 0
def __repr__(self):
return "<Station %2d>" % self.id
def __str__(self):
return self.lat+','+self.long
def prettystring(self):
return "%s at %s"%(self.name, str(self))
pass
def distancematrix(startplaces,endplaces):
geo_args = {
'origins': startplaces,
'destinations': endplaces,
'mode': 'bicycling',
'sensor': "false",
}
BASE_URL = 'http://maps.googleapis.com/maps/api/distancematrix/json'
url = BASE_URL + '?' + urllib.urlencode(geo_args)
result = json.load(urllib2.urlopen(url))
assert(result['status']!='OVER_QUERY_LIMIT')
times = numpy.zeros((len(result['origin_addresses']), len(result['destination_addresses'])), dtype=numpy.int32) # times in seconds
distances = numpy.zeros((len(result['origin_addresses']), len(result['destination_addresses'])), dtype=numpy.int32) # distances in m
for i,row in enumerate(result['rows']):
for j,element in enumerate(row['elements']):
if element['status']=='ZERO_RESULTS':
print 'ZERO_RESULTS for ' + result['origin_addresses'][i] + ' to ' + result['destination_addresses'][j]
times[i][j]=-1
distances[i][j]=-1
else:
times[i][j] = element['duration']['value']
distances[i][j] = element['distance']['value']
return times,distances,result
stations_file = urllib2.urlopen('http://thehubway.com/data/stations/bikeStations.xml')
tree = ElementTree()
tree.parse(stations_file)
stations = list()
for s in tree.iter('station'):
station = Station()
station.id = int(s.find('id').text)
station.lat = s.find('lat').text
station.long = s.find('long').text
station.name = s.find('name').text
stations.append(station)
##################### Change These #######################
number_of_hackers=2 # This partitions the problem
this_hacker=0 # hacker number 0 of 2
debug=2000 #debug is a limit for testing, make it very big for a full run
data_dir = 'data'
os.path.exists(data_dir) or os.mkdir(data_dir)
assert this_hacker < number_of_hackers
number_each=int(numpy.ceil(len(stations)/float(number_of_hackers)))
startstations=stations[this_hacker*number_each:(this_hacker+1)*number_each]
endstations=stations
number_of_rowblocks=min(debug,int(numpy.ceil(number_each/10.0)))
number_of_colblocks=min(debug,int(numpy.ceil(len(stations)/10.0)))
for idx_row in xrange(number_of_rowblocks):
for idx_col in xrange(number_of_colblocks):
startplaces_ids = '|'.join([str(s.id).rjust(2) for s in stations[this_hacker*number_each:(this_hacker+1)*number_each][idx_row*10:(idx_row+1)*10] ])
endplaces_ids = '|'.join([str(s.id).rjust(2) for s in stations[(idx_col*10):(idx_col+1)*10]])
#make a check matrix of start->end ids.
startplaces_list = [s.id for s in stations[this_hacker*number_each:(this_hacker+1)*number_each][idx_row*10:(idx_row+1)*10] ]
endplaces_list = [s.id for s in stations[(idx_col*10):(idx_col+1)*10]]
checkmatrix=numpy.empty((len(startplaces_list),len(endplaces_list)))
for idx_block_row,start in enumerate(startplaces_list):
for idx_block_col,end in enumerate(endplaces_list):
checkmatrix[idx_block_row,idx_block_col]=start*1000+end
startplaces = '|'.join([str(s) for s in stations[this_hacker*number_each:(this_hacker+1)*number_each][idx_row*10:(idx_row+1)*10] ])
endplaces = '|'.join([str(s) for s in stations[(idx_col*10):(idx_col+1)*10]])
print 'start ' + startplaces_ids +', end ' + endplaces_ids
distfilename = os.path.join(data_dir,'dist_matrix_'+str(this_hacker*number_of_rowblocks+idx_row)+'_'+str(idx_col)+'.txt')
timefilename = os.path.join(data_dir,'time_matrix_'+str(this_hacker*number_of_rowblocks+idx_row)+'_'+str(idx_col)+'.txt')
checkfilename = os.path.join(data_dir,'check_matrix_'+str(this_hacker*number_of_rowblocks+idx_row)+'_'+str(idx_col)+'.txt')
jsonfilename = os.path.join(data_dir,'json_result_'+str(this_hacker*number_of_rowblocks+idx_row)+'_'+str(idx_col)+'.js')
if os.path.exists(distfilename) and os.path.exists(timefilename) and os.path.exists(jsonfilename):
print 'skipping ' + distfilename + ' and ' + timefilename + ' and ' + jsonfilename
else:
times,distances,result = distancematrix(startplaces,endplaces)
with open(jsonfilename,'w') as f:
f.write(json.dumps(result))
#with open(timefilename,'w') as f:
# distances.tofile(f)
numpy.savetxt(distfilename,distances)
numpy.savetxt(timefilename,times)
print 'writing ' + distfilename + ' and ' + timefilename
time.sleep(numpy.random.rand()*2+12)
if not os.path.exists(checkfilename):
numpy.savetxt(checkfilename,checkmatrix)
from matplotlib import pyplot as plt
times=numpy.zeros((len(stations),len(stations)),dtype=numpy.int32)
time_matrix_list=[a for a in os.listdir(data_dir) if a.startswith('time_matrix') and a.endswith('.txt')]
for time_matrix in time_matrix_list:
row,col=time_matrix.split('.')[0].split('_')[2:4]
row=int(row)
col=int(col)
timesblock=numpy.genfromtxt(os.path.join(data_dir,time_matrix))
print row,col
times[row*10:(row+1)*10,col*10:(col+1)*10]=timesblock[0:10,0:10]
check=numpy.zeros((len(stations),len(stations)),dtype=numpy.int32)
check_matrix_list=[a for a in os.listdir(data_dir) if a.startswith('check_matrix') and a.endswith('.txt')]
for check_matrix in check_matrix_list:
row,col=check_matrix.split('.')[0].split('_')[2:4]
row=int(row)
col=int(col)
checkblock=numpy.genfromtxt(os.path.join(data_dir,check_matrix))
rows,cols=checkblock.shape
check[row*10:row*10+rows,col*10:col*10+cols]=checkblock
start=check/1000
end=check%100
plt.spy(times)
plt.spy(times==-1)
# the shape of this is discouraging: did we screw up somewhere?
# get the rows with more than one negative one
inaccessible_stations = set(((times==-1).sum(1)>1).nonzero()[0])
# add the columns with more than one negative one
inaccessible_stations = inaccessible_stations.union(((times==-1).sum(0)>1).nonzero()[0])
print "Difficulty accessing these stations, so removing them:"
for s in inaccessible_stations:
print stations[s].prettystring()
keepers = range(len(times))
for s in inaccessible_stations:
keepers.remove(s)
print "Also removing these stations, just for testing:"
for s in range(0):
if s in keepers:
print stations[s].prettystring()
keepers.remove(s)
pruned_times = times[keepers][:,keepers]
pruned_stations = [stations[i] for i in range(len(stations)) if i in keepers]
plt.spy(pruned_times==-1)
### Now for the good stuff, based on tsp.py
# Copyright 2010-2012 Google
# Licensed under the Apache License, Version 2.0
from google.apputils import app
import gflags
import os
import sys
# set OR_TOOLS_PATH in your environment to override the default
or_tools_path = '/Users/rwest/XCodeProjects/google-or-tools/trunk'
if not os.path.exists(or_tools_path):
or_tools_path = 'or_tools_path'
or_tools_path = os.getenv('OR_TOOLS_PATH',or_tools_path)
sys.path.append(os.path.join(or_tools_path,'src'))
from constraint_solver import pywraprouting
def Distance(i, j):
"""The distance from i to j"""
return pruned_times[i,j]
tsp_size = len(pruned_times)
forbidden_connections = [] # a list of tuples of forbidden connections
sorted_stations = []
if True:
# TSP of size FLAGS.tsp_size
# Second argument = 1 to build a single tour (it's a TSP).
# Nodes are indexed from 0 to FLAGS_tsp_size - 1, by default the start of
# the route is node 0.
routing = pywraprouting.RoutingModel(tsp_size, 1)
# Setting first solution heuristic (cheapest addition).
routing.SetCommandLineOption('routing_first_solution', 'PathCheapestArc')
# Disabling Large Neighborhood Search, comment out to activate it.
# routing.SetCommandLineOption('routing_no_lns', 'true')
# Setting the cost function.
# Put a callback to the distance accessor here. The callback takes two
# arguments (the from and to node inidices) and returns the distance between
# these nodes.
routing.SetCost(Distance)
for from_node,to_node in forbidden_connections:
if routing.NextVar(from_node).Contains(to_node):
print 'Forbidding connection ' + str(from_node) + ' -> ' + str(to_node)
routing.NextVar(from_node).RemoveValue(to_node)
# Solve, returns a solution if any.
assignment = routing.Solve()
if assignment:
# Solution cost.
print "total time:", assignment.ObjectiveValue()
# Inspect solution.
# Only one route here; otherwise iterate from 0 to routing.vehicles() - 1
route_number = 0
node = routing.Start(route_number)
route = ''
while not routing.IsEnd(node):
station = pruned_stations[int(node)]
route += station.prettystring() + ' -> \n'
sorted_stations.append({'name': station.name, 'lat':station.lat, 'long':station.long})
node = assignment.Value(routing.NextVar(node))
route += '0'
print route
else:
print 'No solution found.'
with open('route.json','w') as f:
json.dump(sorted_stations,f)