In [1]:
import numpy as np
import pandas as pd
import time
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from collections import Counter
from matplotlib import pyplot as plt
%matplotlib inline
import pickle

In [None]:
def find_multi(row):
    found = False
    if '), (' in row:
        found = True
    return found

def prep_row(row):
    cleaned = row[16:-3]
    return cleaned 

def get_poly(data):
    # split the string into elements of a list by each comma
    vertex = data.split(",")
    # strip white spaces from each element
    vertex = [item.strip() for item in vertex ]
    # format the elements from 'latitude longitude' string to (latitude, longitude) floats
    vertex = [(float(item.split(" ")[0]) ,float(item.split(" ")[1])) for item in vertex]
    return (np.asarray(vertex))

def get_multi(multi):
    #replace '), (' which separate polygons to ')&(' to be able to split properly
    fxn = multi.replace(')), ((', "&")
#     fxn = fxn.split('&')
    fxn = fxn.replace("), (", "&")
    #should look like : '(lon lat, ...)&(lon lat,...)&(lon lat,...)'
    #split polygons at the ampersand,  leaving strings ready to feed into get_poly
    fxn = fxn.split("&") 
    #should look like : ['(lon lat,...)', '(lon lat,...)', .....] 
    #create an empty 2D array to store a list of points per polygon, while still keeping together
    multi_2d = []
    #feed each polygon(i.e. each element in current fxn list) into the get_poly fxn so that each 
    #polygon has it's own formatted list of points while keeping them all together
    for i in range(len(fxn)):
        multi_2d.append(get_poly(fxn[i])) 
    multi_list = np.asarray(multi_2d)
    return multi_list


def parse_shapes(one_row):
    prepped = prep_row(one_row)
    a_multi = find_multi(prepped)
    if a_multi == True:
        prepd_split = get_multi(prepped)
        return (prepd_split)
    else:
        prepd_cleand = get_poly(prepped)
        return (prepd_cleand)

def is_multi(row):
    yes_multi = False
    if len(row.shape) > 2 or len(row.shape)==1:
        yes_multi = True
    return yes_multi

def test_point (point, cleaned_data):
    test_pt = Point(point[0], point[1]) 
    positive_gons = []
    for i in range(len(cleaned_data)):
        test_multi = is_multi(cleaned_data[i])
#         print ('cleaned data', cleaned_data[i])
#         print ('multi test', test_multi)
        if test_multi:
            for j in range(len(cleaned_data[i])): #FOR EVERY POLYGON IN MULTIGON
                try:
#                     print (' polygon', cleaned_data[i][j])
                    polygon1 = Polygon(cleaned_data[i][j])
                    in_poly = polygon1.contains(test_pt)
                    if in_poly == True:
                        positive_gons.append(i)
                except: 
                    print (i, j)
        else:
            in_poly = False
            try:
                polygon1 = Polygon(cleaned_data[i])
                in_poly = polygon1.contains(test_pt)
                if in_poly == True:
                    positive_gons.append(i)
            except:
                print ('not multi', i)
    return positive_gons

def which_polygons(zipped_pts, nice_data):
    where_pts_r = []
    for i in range(len(zipped_pts)):
        where_pts_r.append(test_point(zipped_pts[i], nice_data))
    return where_pts_r

def find_usage(found_polygons, usage_column):
    usage = []
    usage_col = usage_column
    for i in range(len(found_polygons)):
        usage.append(usage_col[found_polygons[i][0]])
    return usage

def find_open(found_polygons):
    in_open = []
    for i in range(len(found_polygons)):
        if len(found_polygons[i]) == 0:
            in_open.append(False)
        else:
            in_open.append(True)
    return in_open

def wrapper_landuse(zipped_points, uncleaned_data_col, cross_checked_col):
    points = zipped_points
    raw_data = uncleaned_data_col
    
    start = time.time()
    cleaned_data = raw_data.apply(parse_shapes)
    end = time.time()
    applyfxntime = end - start
    print (applyfxntime)
    
    start = time.time()
    locations_of_points = which_polygons(points, cleaned_data)
    end = time.time()
    wptime = end - start
    print (wptime)
    
    start = time.time()
    land_use = find_usage(locations_of_points, cross_checked_col)
    end = time.time()
    futime = end - start 
    print (futime)
    
    return land_use

def wrapper_openspace(zipped_points, uncleaned_data_col):
    points = zipped_points
    raw_data = uncleaned_data_col
    cleaned_data = raw_data.apply(parse_shapes)
    locations_of_points = which_polygons(points, cleaned_data)
    open_space = find_open(locations_of_points)
    return open_space

def proportion_land(list):
    length = len(list)
    prop_list = Counter(list)
    these_keys = prop_list.keys()
    for key in these_keys:
        prop_list[key]/=length
    return prop_list

def find_open_prop(list_rands, open_column):
    dict_of_props = []
    for points in range(len(list_rands)):
        set_randoms = list_rands[points]
        list_of_open = wrapper_openspace(set_randoms, open_column)
        proportions = proportion_land(list_of_open)
        dict_of_props.append(proportions)
    return dict_of_props

def find_land_prop(list_rands, land_column, usage_column):
    dict_of_props = []
    i = 0
    for points in range(len(list_rands)):
        if i%1000 == 0:
            print ('point', i)
        set_randoms = list_rands[points]
        list_of_use = wrapper_landuse(set_randoms, land_column, usage_column)
        proportions = proportion_land(list_of_use)
        dict_of_props.append(proportions)
    return dict_of_props

fdata_land = pd.read_csv('land_use_full.csv')
print ('I have read in full land data')

fdata_open = pd.read_csv('open_space_full.csv')
print ('I have read in full open data')

land_col = fdata_land['SHAPE']
land_usage = fdata_land['LU05_DESC']
open_col = fdata_open['SHAPE']



pic_pts = pickle.load(open('correct_randpts', 'rb'))
total_land_props = find_land_prop(pic_pts, land_col, land_usage)
print ('I have computed land proportions')

total_open_props = find_open_prop(pic_pts, open_col)
print ('I have computer open proportions')

file_land = open('all_land_props', 'wb')
file_open = open('all_open_props', 'wb')

pickle.dump(total_land_props, file_land)
pickle.dump(total_open_props, file_open)

print ('success.')

file_land.close()
file_open.close()








I have read in full land data
I have read in full open data
0.8249390125274658


In [3]:
parsed = parse_shapes(land_col[3])
# print (parsed.shape)
# print (pic_pts[0].shape)
# print (pic_pts[0][0])
polygons_found = which_polygons([pic_pts[0][0]], [parsed])
print (polygons_found)

cleaned data [ array([[-71.19130559,  42.32732592],
       [-71.19091287,  42.32762503],
       [-71.19122636,  42.32786615],
       ..., 
       [-71.19080443,  42.32695133],
       [-71.19088273,  42.32700986],
       [-71.19130559,  42.32732592]])
 array([[-71.19777524,  42.31868388],
       [-71.19776359,  42.31899004],
       [-71.19766983,  42.31898745],
       [-71.19768208,  42.31868138],
       [-71.19777524,  42.31868388]])
 array([[-71.18850772,  42.31955877],
       [-71.18834548,  42.31986788],
       [-71.18812792,  42.31979083],
       [-71.1882998 ,  42.3194884 ],
       [-71.18850772,  42.31955877]])
 array([[-71.1930434 ,  42.32332238],
       [-71.19304239,  42.32353212],
       [-71.1924838 ,  42.32350136],
       [-71.19265331,  42.32331199],
       [-71.1930434 ,  42.32332238]])
 array([[-71.20116655,  42.32385988],
       [-71.20100307,  42.32397703],
       [-71.20057912,  42.32378236],
       [-71.20063367,  42.32371453],
       [-71.2007213 ,  42.32360556],
  