In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import LineString
from shapely.geometry import Point
from shapely.geometry import Polygon
import shapely.geometry
from math import cos, asin, sqrt

In [2]:
#1st - read in the csv file
routedata = pd.read_csv("C:/Users/Anthony DePinho/Documents/REU 2017/TRiCAM_BostonAQ/bus_static_data/bus_routes_full.csv")
#pull out only the shape column which are all in string format
shape_col = routedata["SHAPE"]
print (shape_col.shape)

stopdata = pd.read_csv("C:/Users/Anthony DePinho/Documents/REU 2017/TRiCAM_BostonAQ/bus_static_data/bus_stops_full.csv")
#pull out only the shape column which are all in string format
point_col = stopdata["SHAPE"]
town_col = stopdata["TOWN"]
print (town_col.unique())

(698,)
['NEWTON' 'WALTHAM' 'ARLINGTON' 'WATERTOWN' 'BELMONT' 'BOSTON' 'BROOKLINE'
 'CAMBRIDGE' 'MEDFORD' 'SOMERVILLE' 'MALDEN' 'EVERETT' 'CHELSEA' 'REVERE'
 'QUINCY' 'WINTHROP']


In [3]:
#create multigon finder and deal with it separately, then .apply(get_coord)
def find_multi(row):
    row_clean = row[16:-3]
    found = False
    if '), (' in row_clean:
        found = True
    return found

In [4]:
#PREP FUNCTION COMES BEFORE FEEDING INTO THE APPROPRIATE MULTI/POLY FUNCTION TO MAKE DATA UNIFORM FORMAT
def prep_row(row):
    #pull off 'MULTIPOLYGON ((' leaving one set of parentheses around string
    cleaned = row[18:-2]
    return cleaned # should look like '(lon lat, lon lat,...), (lon lat, lon lat...), (.....)'

In [5]:
#GET_POLY FUNCTION CLEANS AND DEALS WITH SINGLE POLYGONS
#MULTIPOLYGON ROWS GET FED INTO GET_MULTI, WHICH THEN UTILIZES THIS FUNCTION WITHIN IT
def get_poly(data):
    # Remove first and last parentheses, leaving behind only values separated by commas as a string
#     vertex = data[1:-2] 
    # split the string into elements of a list by each comma
    vertex = data.split(",")
    # strip of white spaces on each element
    vertex = [item.strip() for item in vertex ]
    # format the elements from 'latitude longitude' string to (latitude, longitude) floats
    vertex = [(float(item.split(" ")[0]) ,float(item.split(" ")[1])) for item in vertex]
    return (vertex)

In [6]:
#SCALE FUNCTION
def scale(num):
    #create 14 digits by padding 0's if needed
    num1 = str(abs(num[0]))
    num2 = str(abs(num[1]))
    
    if len(num1) < 15:
        num1 = num1.ljust(15, '0')
    elif len(num1) > 15:
        num1 = num1[:15]
    
    if len(num2) < 15:
        num2 = num2.ljust(15, '0')
    elif len(num2) > 15:
        num2 = num2[:15]
    
    #shift decimal for 8 decimal places
    return np.array([-1.*(float(num1) * (10**5)),-1.*(float(num2) * (10**5))])

In [7]:
#CENTER FUNCTION
def center(coord):
    # coord is an np array containing lat and long
    
    #find mean
    mean = coord.mean()
    
    #center by subtracting the mean
    coord = coord - mean
    return coord

In [8]:

def parse_shapes(one_row):
    is_multi = find_multi(one_row)
    if is_multi == True:
        prepped = prep_row(one_row)
        prepd_split = get_multi(prepped)
        return (prepd_split)
    else:
        prepped = prep_row(one_row)
        prepd_cleand = get_poly(prepped)
        return (prepd_cleand)

In [9]:
#Grid generation, using our x and y minima and maxima as bounds
x = [ -71.20197, -70.96679]
y = [42.291441, 42.420578]
x_cell = 50
y_cell = 50


x_min = x[0]
x_max = x[1]
y_min = y[0]
y_max = y[1]

#Create ticks
x_s = np.linspace(x_min, x_max, x_cell + 1)
y_s = np.linspace(y_min, y_max, y_cell + 1)

#Create grid
x_coord, y_coord = np.meshgrid(x_s, y_s)


In [10]:
#This stores the routes in routesaslines, a list of LineStrings
#Each item is a LineString. Within each is a series of tuples that describe the point
routesaslines = []
for x in range(0, len(shape_col)):
    route = parse_shapes(shape_col[x])
    routesaslines.append(LineString(route))
    
    
print(routesaslines[0].coords[0])
print(routesaslines[0].coords[0][0] - routesaslines[0].coords[1][0])
print(routesaslines[0].coords[0][0])
print(routesaslines[0].coords[0][1])

(-71.08858713142608, 42.4381500001951)
0.00022953293573380051
-71.08858713142608
42.4381500001951


In [11]:
print (list(routesaslines[5].coords))

[(-71.05650879381709, 42.35526524447907), (-71.05657091932102, 42.35552884357215), (-71.05724677250865, 42.355480184127295), (-71.05735122971485, 42.35547266368026), (-71.0573829985682, 42.35534678569011), (-71.05743882498959, 42.35517807618312), (-71.05754314193874, 42.35506488152648), (-71.05772249655764, 42.35498083962923), (-71.05777137038378, 42.354933489499146), (-71.05853007171454, 42.35421870488789), (-71.05866186311385, 42.35409454019036), (-71.05804453223068, 42.353705852424085), (-71.0578080512638, 42.35351425256582), (-71.0578280931694, 42.353339040142515), (-71.05790459061207, 42.352917994131616), (-71.05795280808209, 42.35261846605321), (-71.05800857406429, 42.352350301608894), (-71.05817068365216, 42.35228817063492), (-71.0584141456137, 42.35215207958184), (-71.05859583120886, 42.352032856446485), (-71.0587855018297, 42.35188224527398), (-71.05896762214203, 42.35170027467003), (-71.05914235168508, 42.35148108978003), (-71.05925551002755, 42.35128462428929), (-71.05934129

In [12]:
distances = np.zeros((x_s.size - 1, y_s.size - 1))

In [13]:
def distance(lon1, lon2, lat1, lat2):
    p = 0.017453292519943295     #Pi/180
    a = 0.5 - cos((lat2 - lat1) * p)/2 + cos(lat1 * p) * cos(lat2 * p) * (1 - cos((lon2 - lon1) * p)) / 2
    return 12742 * asin(sqrt(a)) #2*R*asin...

In [14]:
#route is a LineString object
#x_coord, y_coord is a numpy array
#point is a specified point, in the sequence of the route

def find_x (route, x_coord, point):
    x_tracker = 0
    while route.coords[point][0] >= x_coord[0][x_tracker]:
        x_tracker += 1
        if x_tracker == len(x_coord[0]):
            return -1
            break
    return x_tracker

def find_y (route, y_coord, point):
    y_tracker = 0
    while route.coords[point][1] >= y_coord[y_tracker][0]:
        y_tracker += 1
        if y_tracker == len(y_coord[0]):
            return -1
            break
    return y_tracker



In [54]:
#pretend the first point is here
#loop through all possible x values

#for i in range(len(routesaslines)):
    
    #route = routesaslines[i]
    
#looping through route0 (a sinlge route)
route = routesaslines[10]
a = -1

while a < (len(route.coords)-1):
    a += 1
    point1 = Point(route.coords[a-1])
    miniroute = LineString([point1, route.coords[a]])
    dist = (0.621371 * distance(point1.x, route.coords[a][0], point1.y, route.coords[a][1]))
    #x and y coordinates of vertex of the box in which the point a is located
    x_pt = find_x(route, x_coord, a)
    y_pt = find_y(route, y_coord, a)


    if (x_pt == -1) or (y_pt == -1):
        continue

    box = Polygon([(x_coord[0][x_pt-1], y_coord[y_pt-1][0]), (x_coord[0][x_pt], y_coord[y_pt-1][0]), (x_coord[0][x_pt-1], y_coord[y_pt][0]), (x_coord[0][x_pt], y_coord[y_pt][0])])
    while box.contains(miniroute):
        if a > (len(route.coords)-3):
            break
        miniroute = LineString([route.coords[a+1], route.coords[a+2]])
        dist = (0.621371 * distance(route.coords[a+1][0], route.coords[a+2][0], route.coords[a+1][1], route.coords[a+2][1]))
        a += 1
    distances[x_pt-1][y_pt-1] = dist

    

    

    
print("done")

done


In [53]:
df = pd.DataFrame(distances)
df.to_csv("C:/Users/Anthony DePinho/Documents/REU 2017/TRiCAM_BostonAQ/routemodel_3.csv")
print("written")

written


In [18]:
print(distances[26])
print(routesaslines[0].coords[0])
print(routesaslines[0].coords[1])
print(0.621371 * distance(routesaslines[0].coords[0][0], routesaslines[0].coords[1][0], routesaslines[0].coords[0][1], routesaslines[0].coords[1][1]))

[ 0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.00036762  0.00027211  0.          0.          0.          0.          0.
  0.          0.          0.          0.          0.          0.          0.
  0.        ]
(-71.08858713142608, 42.4381500001951)
(-71.08881666436181, 42.438335374918665)
0.017350426574998735


In [19]:
triplengths = [LineString([routesaslines[0].coords[i-1], routesaslines[0].coords[i]]).length for i in range (1, len(routesaslines[0].coords))]
trip = sum(triplengths)
print(trip)


0.10191286932164008


In [66]:
print(y_coord[50][0], x_coord[0][0])

42.420578 -71.20197
