Large diffs are not rendered by default.

18 main.py
@@ -1,6 +1,8 @@
import node
from citiStation import CitiStation
import csv
import math
import numpy as np

def get_citiBike_stations():
""" Parse citibike data into nodes w/ features and value """
@@ -101,16 +103,18 @@ def get_k_new_stations(k, n):

new_nodes = read_new_nodes()

for node in new_nodes[:10]:

for node in new_nodes[0:100]:
# For the get near_by_venues => only 500 requests per hour.
node.calculate_desireability()
break


# existing_nodes = read_exisiting_nodes()
#
# print"%d + %d = %d"%(len(new_nodes),len(existing_nodes),len(new_nodes)+len(existing_nodes))
#
# result = 0
# for node in existing_nodes:
# test =node.get_nearby_accidents()
# if test>0:
# print node.location
# print test
# node.calculate_desireability()
# if node.feature_nearby_accident>0:
# result+=node.feature_nearby_accident
# print result
102 node.py
@@ -6,6 +6,7 @@
import numpy as np
import googlemaps
from datetime import datetime
from scipy import spatial

class Node:
def __init__(self, location, does_exist):
@@ -34,33 +35,22 @@ def calculate_desireability(self):
# self.feature_average_rack_distance = self.get_average_rack_distance()

def get_nearby_accidents(self):
# """Gets nearby accidents"""
# """Earth Radius 6371 km"""
# xcord_self = (6371*1000)*math.cos((self.location[0]*2*math.pi)/float(360))
# ycord_self = (6371*1000)*math.sin((self.location[1]*2*math.pi)/float(360))
# threshold = 256 # the size of a block in manhattan
# """ stored as (long,lat,injured,killed) """
#
# accident_points = []
# #accident_results = []
# accident_points = util.get_AccidentCords() if (len(util.get_AccidentCords())!=0) else util.set_Accidents()
# #accident_results = util.AccidentResults
#
# cords_self = np.array([xcord_self,ycord_self])
#
# result = 0
# dist = 0
# for i in range(0,len(accident_points)):
# dist = np.linalg.norm(cords_self-accident_points[i])
# print dist
# if dist<-2343250:
#
# result = result + accident_results[i]
#
# return result
pass
"""Gets nearby accidents"""
threshold = 0.000042 # the size of a 2block in manhattan in change of degrees

if (len(util.Util().AccidentResults) == 0):
util.Util().set_Accidents()
accident_points = util.Util().AccidentCords
accident_results = util.Util().AccidentResults
result = 0
point = (self.location[0],self.location[1])

hits = accident_points.query_ball_point(point, 0.000042, 2)

for val in hits:
result+=int(accident_results[val])

return result

def get_nearby_venues(self):
"""Gets nearby venues within a 100 meter radius"""
@@ -87,37 +77,43 @@ def get_biking_popularity(self):


def get_nearby_transportation(self):
"""Gets the nearby transportation (bus stop, subway, etc.)"""
if len(util.Util().Subways) == 0:
"""Gets the nearby transportation (bus stop, subway, etc.)
Note that i am using 0.0042 intead of 0.000042
"""

if not isinstance(util.Util().Subways, spatial.ckdtree.cKDTree):
util.Util().set_Subways()
print util.Util().Subways is util.Util().Subways
# subways = util.Subways if (len(util.Subways) != 0) else util.set_Subways()
# print subways
# xcord_self = (6371*1000)*math.cos((self.location[0]*2*math.pi)/float(360))
# ycord_self = (6371*1000)*math.sin((self.location[1]*2*math.pi)/float(360))
# pt = np.array([xcord_self, ycord_self])
#
# def distances(a):
# return np.linalg.norm(a-pt)
#
# vfunc = np.vectorize(distances)
# # print(distances(subways[0], np.array([xcord_self, ycord_self])))
# data = np.array([np.linalg.norm(a-pt) for a in subways])
# print(data[:10])
# ans = np.where( data < 100 )
# print(len(data))
# return 1
pass

if not isinstance(util.Util().Busses, spatial.ckdtree.cKDTree):
util.Util().set_Busses()

pt = [self.location[0], self.location[1]]

subways = util.Util().Subways
transit = len(subways.query_ball_point(pt, 0.0042, 2))

busses = util.Util().Busses

transit += len(busses.query_ball_point(pt, 0.0042, 2))
return transit

def get_average_rack_distance(self):
"""Gets the average distance to closest 4 racks"""

if not isinstance(util.Util().Existing_Nodes, spatial.ckdtree.cKDTree):
util.Util().set_Exisiting_Nodes()

pt = [self.location[0], self.location[1]]
existing_nodes = util.Util().Existing_Nodes

neightboring_nodes = existing_nodes.query(pt, k=4)
return sum(neightboring_nodes[0])

if __name__ == '__main__':
n = Node((-73.9808623, 40.7587442), True)
print n.get_nearby_accidents()
print n.get_nearby_venues()
print n.get_pedestrian_flow()
print n.get_nearby_transportation()
print n.get_biking_popularity()
print n.get_average_rack_distance()
# if __name__ == '__main__':
# n = Node((-73.9808623, 40.7587442), True)
# print n.get_nearby_accidents()
# print n.get_nearby_venues()
# print n.get_pedestrian_flow()
# print n.get_nearby_transportation()
# print n.get_biking_popularity()
# print n.get_average_rack_distance()
134 util.py
@@ -6,6 +6,8 @@
import random
from datetime import datetime
from collections import defaultdict
from scipy import spatial
import node

def get_Subways():
return Subways
@@ -26,69 +28,34 @@ def __init__(self):
self.AccidentCords = []
self.AccidentResults = []
self.LocationPopularity = []
self.Existing_Nodes = []

# Setter Functions

def set_Subways(self):
self.Subways = ['a','b','c']
# subways_locations = set([])
# with open('data/transit/subway/stops.txt') as stops:
# next(stops)
# for line in stops:
# line = line.strip()
# line = line.split(',')
# xcord_self = (6371*1000)*math.cos((float(line[5])*2*math.pi)/float(360))
# ycord_self = (6371*1000)*math.sin((float(line[4])*2*math.pi)/float(360))
# subways_locations.add((xcord_self, ycord_self))
# ans = [np.array([x[0], x[1]]) for x in list(subways_locations)]
# Subways = ans
# return ans
pass
subways_locations = set([])
with open('data/transit/subway/stops.txt') as stops:
next(stops)
for line in stops:
line = line.strip()
line = line.split(',')
# Long lat
subways_locations.add((float(line[5]), float(line[4])))

subway_list = list(subways_locations)
subways = spatial.cKDTree(subway_list)
self.Subways = subways

def set_Busses(self):
# bus_stops = []
# with open('data/transit/bus/stops.txt') as stops:
# next(stops)
# for line in stops:
# line = line.strip()
# line = line.split(',')
# xcord_self = (6371*1000)*math.cos((float(line[4])*2*math.pi)/float(360))
# ycord_self = (6371*1000)*math.sin((float(line[3])*2*math.pi)/float(360))
# bus_stops.append(np.array([xcord_self, ycord_self]))
# Busses = bus_stops
# return bus_stops
pass

def set_Accidents(self):
# f = open("./data/accident/NYPD_Motor_Vehicle_Collisions.csv")
# i=0
# accident_cords = []
# accident_results=[]
# for line in f:
# line = line.strip()
# line = line.split(',')
# if len(line)==30 and i!=0:
# #print i
# #print line
# if line[4]!=''and line[5] != '' and line[14] != '' and line[15] != '':
# if int(line[14])>0 or int(line[15])>0:
# point = (float(line[4]),float(line[5]),int(line[14]),int(line[15]))
#
# xcord_accident = (6371*1000)*math.cos((point[0]*2*math.pi)/float(360))
# ycord_accident = (6371*1000)*math.sin((point[1]*2*math.pi)/float(360))
#
# a = np.array([xcord_accident,ycord_accident])
# accident_cords.append(a)
# accident_results.append( point[2]+point[3] )
# i+=1
#
# f.close()
# AccidentCords = accident_cords
# AccidentResults = accident_results
# print len(AccidentCords)
# print len(AccidentResults)
# return AccidentCords
pass
bus_stops = []
with open('data/transit/bus/stops.txt') as stops:
next(stops)
for line in stops:
line = line.strip()
line = line.split(',')
bus_stops.append([float(line[4]), float(line[3])])

self.Busses = spatial.cKDTree(bus_stops)

def set_LocationPopularity(self):
routes = []
@@ -107,7 +74,7 @@ def set_LocationPopularity(self):
try:
location_popularity_dict = pickle.load(open("location_popularity.pkl", "rb"))
except:
gmaps = googlemaps.Client(key='AIzaSyDrfuB9hQjsZSxehG-vbXtRKJ96PA0d4Sw')
gmaps = googlemaps.Client(key='AIzaSyCHZvHuiz-OFrYSrlm06VpRNQeZfXsK_1Q')
now = datetime.now()
for route in routes[0:2400]:
directions_result = gmaps.directions(route['start'][::-1],
@@ -117,7 +84,56 @@ def set_LocationPopularity(self):
for leg in direction['legs']:
for step in leg['steps']:
location_popularity_dict[
(step['start_location']['lat'],
(step['start_location']['lng'],
step['start_location']['lat'])] += 1
pickle.dump(location_popularity_dict, open("location_popularity.pkl", "wb"))
self.LocationPopularity = location_popularity_dict

def set_Accidents(self):
f = open("./data/accident/NYPD_Motor_Vehicle_Collisions.csv")

i=0
accident_cords = []
accident_results=[]
for line in f:
line = line.strip()
line = line.split(',')
if len(line)==30 and i!=0:

if line[4]!=''and line[5] != '' and line[14] != '' and line[15] != '':
if int(line[14])>0 or int(line[15])>0:
point = (float(line[4]),float(line[5]),int(line[14]),int(line[15]))

""" long , lat"""
accident_cords.append( (point[1],point[0]) )
accident_results.append( point[2]+point[3] )
i+=1

f.close()

self.AccidentCords = spatial.cKDTree(accident_cords)
self.AccidentResults = accident_results

def set_Exisiting_Nodes(self):
f = open("existing_rack.csv","r")

existing_nodes=[]

for line in f:
line = line.strip()

if len(line)>28 and str(line[:28])=="</div>\",#Style2-point-1-map,":
lat = 0
lon = 0
# need to deal with multiple digit racks
line = line.split(',')

if line[3] !='' or line[4] !='':
lon = float(line[3])
lat = float(line[4])
loc = (lon,lat)
existing_nodes.append(node.Node(location=loc,does_exist=True))

f.close() # close the existing node file
nodes = [(x.location[0], x.location[1]) for x in existing_nodes]
self.Existing_Nodes = spatial.cKDTree(nodes)