In [1]:
import requests
import xmltodict
import collections
import numpy as np
import pandas as pd
from shapely.geometry import Point

In [2]:
#Read the static bus stops file
stopdata = pd.read_csv("C:/Users/Anthony DePinho/Documents/REU 2017/TRiCAM_BostonAQ/bus_static_data/bus_stops_full.csv")
#pull out only the shape column which are all in string format
point_col = stopdata["SHAPE"]
town_col = stopdata["TOWN"]
print (town_col.unique())
print(town_col.size)

['NEWTON' 'WALTHAM' 'ARLINGTON' 'WATERTOWN' 'BELMONT' 'BOSTON' 'BROOKLINE'
 'CAMBRIDGE' 'MEDFORD' 'SOMERVILLE' 'MALDEN' 'EVERETT' 'CHELSEA' 'REVERE'
 'QUINCY' 'WINTHROP']
3006


In [3]:
#create multigon finder and deal with it separately, then .apply(get_coord)
def find_multi(row):
    row_clean = row[16:-3]
    found = False
    if '), (' in row_clean:
        found = True
    return found

In [4]:
#PREP FUNCTION COMES BEFORE FEEDING INTO THE APPROPRIATE MULTI/POLY FUNCTION TO MAKE DATA UNIFORM FORMAT
def prep_row(row):
    #pull off 'MULTIPOLYGON ((' leaving one set of parentheses around string
    cleaned = row[7:-1]
    return cleaned # should look like '(lon lat, lon lat,...), (lon lat, lon lat...), (.....)'

In [5]:
#GET_POLY FUNCTION CLEANS AND DEALS WITH SINGLE POLYGONS
#MULTIPOLYGON ROWS GET FED INTO GET_MULTI, WHICH THEN UTILIZES THIS FUNCTION WITHIN IT
def get_poly(data):
    # Remove first and last parentheses, leaving behind only values separated by commas as a string
#     vertex = data[1:-2] 
    # split the string into elements of a list by each comma
    vertex = data.split(",")
    # strip of white spaces on each element
    vertex = [item.strip() for item in vertex ]
    # format the elements from 'latitude longitude' string to (latitude, longitude) floats
    vertex = [(float(item.split(" ")[0]) ,float(item.split(" ")[1])) for item in vertex]
    return (vertex)

In [6]:
#SCALE FUNCTION
def scale(num):
    #create 14 digits by padding 0's if needed
    num1 = str(abs(num[0]))
    num2 = str(abs(num[1]))
    
    if len(num1) < 15:
        num1 = num1.ljust(15, '0')
    elif len(num1) > 15:
        num1 = num1[:15]
    
    if len(num2) < 15:
        num2 = num2.ljust(15, '0')
    elif len(num2) > 15:
        num2 = num2[:15]
    
    #shift decimal for 8 decimal places
    return np.array([-1.*(float(num1) * (10**5)),-1.*(float(num2) * (10**5))])

In [7]:
def parse_shapes(one_row):
    is_multi = find_multi(one_row)
    if is_multi == True:
        prepped = prep_row(one_row)
        prepd_split = get_multi(prepped)
        return (prepd_split)
    else:
        prepped = prep_row(one_row)
        prepd_cleand = get_poly(prepped)
        return (prepd_cleand)

In [30]:
#route is a LineString object
#x_coord, y_coord is a numpy array
#point is a specified point, in the sequence of the route

def find_x (stoplist, x_coord, point):
    x_tracker = 0
    while (stoplist[point].x) >= x_coord[0][x_tracker]:
        x_tracker += 1
        if x_tracker == len(x_coord[0]):
            return -1
            break
    return x_tracker

def find_y (stoplist, y_coord, point):
    y_tracker = 0
    while (stoplist[point].y) >= y_coord[y_tracker][0]:
        y_tracker += 1
        if y_tracker == len(y_coord[0]):
            return -1
            break
    return y_tracker



In [31]:
#Grid generation, using our x and y minima and maxima as bounds
x = [ -71.20197, -70.96679]
y = [42.291441, 42.420578]
x_cell = 50
y_cell = 50


x_min = x[0]
x_max = x[1]
y_min = y[0]
y_max = y[1]

#Create ticks
x_s = np.linspace(x_min, x_max, x_cell + 1)
y_s = np.linspace(y_min, y_max, y_cell + 1)

#Create grid
x_coord, y_coord = np.meshgrid(x_s, y_s)

In [32]:
#Creatin of stopsbylocation array, which will hold int values of bus stops in each grid cell
#Note: shape is (50,50), not (51,51)
stopsbylocationarray = np.zeros((x_s.size - 1, y_s.size - 1))

In [33]:
#This stores the stops in stopslist, a list of Points
#Each item is a Point. Within each is an ordered pair that describes the point
stopslist = []
for x in range(0, len(point_col)):
    stop = parse_shapes(point_col[x])
    stopslist.append(Point(stop))
    
    
print("done")
print(len(stopslist))

done
3006


In [34]:
stopslist[0].x

-71.2019628980164

In [35]:
#Loop through the list of points
#Identify where on the grid this point lies
#Add it to the corresponding grid box in the stopsbylocationarray

for i in range(len(stopslist)):
    
    x_pt = find_x(stopslist, x_coord, i)
    y_pt = find_y(stopslist, y_coord, i)
    
    if (x_pt == -1) or (y_pt == -1):
        continue
    
    stopsbylocationarray[x_pt-1][y_pt-1] += 1

print("done")

done


In [36]:
#Cast all data points as integers
stopsbylocationarray = stopsbylocationarray.astype(int)
stopsbylocationarray[:5]

array([[0, 0, 2, 0, 0, 0, 2, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 2, 3, 4, 2, 0, 0, 0, 4, 0, 0, 2, 2, 0, 0, 2, 2, 0, 0, 0, 0,
        0, 0, 0, 0, 4, 0],
       [0, 0, 0, 2, 0, 4, 0, 1, 3, 2, 4, 2, 2, 2, 4, 1, 2, 2, 3, 1, 0, 0,
        0, 0, 4, 0, 0, 5, 0, 0, 4, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 2, 4],
       [2, 2, 0, 2, 6, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2,
        1, 0, 4, 0, 0, 4, 1, 3, 2, 0, 0, 0, 2, 2, 2, 3, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 4],
       [2, 2, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        3, 2, 2, 4, 2, 0, 8, 2, 0, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 2, 0, 3, 3],
       [4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 6, 0, 0, 0, 2, 4, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 5, 8, 2, 2]])

In [15]:
df = pd.DataFrame(stopsbylocationarray)
df.to_csv("C:/Users/Anthony DePinho/Documents/REU 2017/TRiCAM_BostonAQ/stopsbylocation.csv")
print("written")

written


In [37]:
print(y_coord[0][0], x_coord[0][2])
print(y_coord[1][0], x_coord[0][3])

42.291441 -71.1925628
42.29402374 -71.1878592


In [42]:
print(y_coord[0][0])
print(y_coord[1][0])

42.291441
42.29402374


In [38]:
stopsbylocationarray[2][0]

2