In [21]:
import numpy as np
import pandas as pd
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon

In [22]:
'''find_multi function takes a raw column of data that represents string versions of polygons and multigons and checks 
to see if it is a polygon or multigon row. This function returns a boolean value that is then used in the function 
parse_shapes'''
#create multigon finder and deal with it separately
def find_multi(row):
    found = False
    if '), (' in row:
        found = True
    return found

In [23]:
'''prep_row function takes a raw row of multi or polygon data and strips off the strings at the beginning and end until
the first and last characters are part of the float vertices that will be utilized later. This function is used
inside the function parse_shapes.'''
#PREP FUNCTION COMES BEFORE FEEDING INTO THE APPROPRIATE MULTI/POLY FUNCTION TO MAKE DATA UNIFORM FORMAT
def prep_row(row):
    cleaned = row[16:-3]
    return cleaned 

In [24]:
'''get_poly function takes in a string row of data that represents a single polygon that has been output from either 
the prep_row function (if the original row was a single polygon) or from the get_multi function. It splits sting
on the comma, strips white space, and then converts string values into vertices in formatted cloat format. This 
is used in the parse_shapes function.'''
#GET_POLY FUNCTION CLEANS AND DEALS WITH SINGLE POLYGONS
#MULTIPOLYGON ROWS GET FED INTO GET_MULTI, WHICH THEN UTILIZES THIS get_poly FUNCTION WITHIN IT
def get_poly(data):
    # split the string into elements of a list by each comma
    vertex = data.split(",")
    # strip white spaces from each element
    vertex = [item.strip() for item in vertex ]
    # format the elements from 'latitude longitude' string to (latitude, longitude) floats
    vertex = [(float(item.split(" ")[0]) ,float(item.split(" ")[1])) for item in vertex]
    return (np.asarray(vertex))

In [25]:
'''get_multi function takes a prepped string row that represents a multigon, it then cleans the row into a list of 
string polygons so that each piece of the list can be individually fed into the get_poly function. Each piece that is
fed into the get_poly function is cleaned and input into an array. The output is a 2D array of polygons in the same 
format that all single polygons are in. This is used in the parse_shapes function.'''
#GET_MULTI DEALS WITH MULTIPOLYGONS BY UTILIZING GET_POLY
def get_multi(multi):
    #replace '), (' which separate polygons to ')&(' to be able to split properly
    fxn = multi.replace("), (", "&")
    #should look like : '(lon lat, ...)&(lon lat,...)&(lon lat,...)'
    #split polygons at the ampersand,  leaving strings ready to feed into get_poly
    fxn = fxn.split("&") 
    #should look like : ['(lon lat,...)', '(lon lat,...)', .....] 
    #create an empty 2D array to store a list of points per polygon, while still keeping together
    multi_2d = []
    #feed each polygon(i.e. each element in current fxn list) into the get_poly fxn so that each 
    #polygon has it's own formatted list of points while keeping them all together
    for i in range(len(fxn)):
        multi_2d.append(get_poly(fxn[i])) 
        multi_list = np.asarray(multi_2d)
    return multi_list


In [26]:
'''parse_shapes function takes in raw data rows, checks if the rows are multigons or polygons, and then feeds the row into
the appropriate function. It returns a cleaned array of vertices for both polygons and multigons. This is used in 
the test_point function.'''
def parse_shapes(one_row):
    prepped = prep_row(one_row)
    is_multi = find_multi(prepped)
    if is_multi == True:
        prepd_split = get_multi(prepped)
        return (prepd_split)
    else:
        prepd_cleand = get_poly(prepped)
        return (prepd_cleand)

In [27]:
'''is_multi function tests if the cleaned array of data is a multigon or polygon by looking at the dimensions of the array.
This function is used in the test_point function.'''
def is_multi(row):
    yes_multi = True
    if len(row.shape) > 1:
        yes_multi = False
    return yes_multi

In [28]:
'''test_point function takes a point and a column of cleaned data and tests the if the point is inside each of the poly 
or multigons in the column. It returns a list of all poly/multigons which the point is located inside. This function 
utilizes shapely functions. Used in the which_polygons function.'''
#FUNCTION TO TEST POLYGONS
def test_point (point, cleaned_data):
    test_pt = Point(point[0], point[1]) 
    positive_gons = []
    for i in range(len(cleaned_data)):
        test_multi = is_multi(cleaned_data[i])
        if test_multi:
            for j in range(len(cleaned_data[i])):
                polygon1 = Polygon(cleaned_data[i][j])
                in_poly = polygon1.contains(test_pt)
                if in_poly == True:
                    positive_gons.append(i)
        else:
            in_poly = False
            polygon1 = Polygon(cleaned_data[i])
            in_poly = polygon1.contains(test_pt)
            if in_poly == True:
                positive_gons.append(i)
    return positive_gons

In [29]:
'''which_polygons function generalizes the testing process by taking in an array of points and a column of cleaned data and 
testing each point against each poly/multigon in the column of cleaned data. Outputs an array of lists where each list 
corresponds to the polygons for which a given point falls into. This function is used in the wrapper function.'''
def which_polygons(zipped_pts, nice_data):
    where_pts_r = []
    for i in range(len(zipped_pts)):
        where_pts_r.append(test_point(zipped_pts[i], nice_data))
    return where_pts_r

In [30]:
'''find_usage function takes in an array of found polygons and a column of string values that represent land usage. The 
function uses the integer that corresponds to a polygon, and cross checks to the land usage column to find the 
land usage that each point falls into. This outputs a list of Strings that tells user what type of land each point is 
located in. This function is used in wrapper class.'''
def find_usage(found_polygons, usage_column):
    usage = []
    usage_col = usage_column
    for i in range(len(found_polygons)):
        usage.append(usage_col[found_polygons[i][0]])
    return usage

In [43]:
def find_open(found_polygons):
    in_open = []
    for i in range(len(found_polygons)):
        if len(found_polygons[i]) == 0:
            in_open.append(False)
        else:
            in_open.append(True)
    return in_open

In [44]:
'''wrapper_landuse function utilizes all functions above by taking in an array of zipped points to be checked, a column of 
uncleaned polygon data, and a column that will be cross checked. It outputs the land usage of each of the points 
passed in as a parameter.'''
def wrapper_landuse(zipped_points, uncleaned_data_col, cross_checked_col):
    points = zipped_points
    raw_data = uncleaned_data_col
    cleaned_data = raw_data.apply(parse_shapes)
    locations_of_points = which_polygons(points, cleaned_data)
    land_use = find_usage(locations_of_points, cross_checked_col)
    return land_use

In [50]:
'''wrapper_openspace function utilizes all functions above (other than find_open) by taking in an array of zipped 
points to be checked and a column of uncleaned polygon data. It outputs a list of booleans saying whether the points
are inside open space polygons or not.'''
def wrapper_openspace(zipped_points, uncleaned_data_col):
    points = zipped_points
    raw_data = uncleaned_data_col
    cleaned_data = raw_data.apply(parse_shapes)
    locations_of_points = which_polygons(points, cleaned_data)
    open_space = find_open(locations_of_points)
    return open_space

In [51]:
some_points = [[-71.13794487, 42.35945246],
               [-71.13926144, 42.35927083],
               [-71.13893327, 42.35915658],
               [-71.13919666, 42.35879337]]

more_points = [[-71.13643, 42.32709],
               [-71.14304, 42.32893],
               [-71.13589, 42.33100],
               [-71.14608, 42.33182]]



In [52]:
#read in a csv file
data_land = pd.read_csv("small_scale_data.csv")
data_open = pd.read_csv("open_space_small.csv")
#pull out only the shape column which are all in string format
shape_open = data_open["SHAPE"]
shape_col = data_land["SHAPE"]
#pull out column of attributes you are also using 
usage_data = data_land["LU05_DESC"]

In [53]:
#TESTING LAND USE WRAPPER CLASS
print (wrapper_landuse(some_points, shape_col, usage_data))

['Industrial', 'Industrial', 'Industrial', 'Commercial']


In [54]:
#TESTING OPEN SPACE WRAPPER CLASS
print(wrapper_openspace(more_points, shape_open))

[True, True, False, False]
