In [1]:
import KeyFunctions as kf
import pygeos
import numpy as np
import pandas as pd

root_path = "D:/GeoData/"
Main_CRS = "EPSG:27700"

%matplotlib inline
import matplotlib.pyplot as plt

In [2]:
#Generalised function to find the nearest candidate geometry to each source geometry

def dist_to_nearest(source, candidates, return_geom = False):
    source_py = pygeos.from_shapely(source.geometry)
    candidates_py = pygeos.from_shapely(candidates.geometry)
    tree = pygeos.STRtree(candidates_py)
    s_near, c_near = tree.nearest(source_py)

    dist = pygeos.distance(source_py[s_near.tolist()], candidates_py[c_near.tolist()])

    if return_geom:
        out = dist, candidates.loc[c_near.tolist() ,"geometry"]
    else:
        out = dist

    return out

In [3]:
def within_radius(source, candidates, radius):
    source_py = pygeos.from_shapely(source.geometry)
    candidates_py = pygeos.from_shapely(candidates.geometry)

    tree = pygeos.STRtree(candidates_py)
    s_idx, c_idx = tree.query_bulk(source_py, predicate='dwithin', distance=radius)
    return np.bincount(s_idx)

In [19]:
def average_within_radius(source, candidates, radius, Value):

    source_py = pygeos.from_shapely(source.geometry)
    candidates_py = pygeos.from_shapely(candidates.geometry)

    tree = pygeos.STRtree(candidates_py)
    s_idx, c_idx = tree.query_bulk(source_py, predicate='dwithin', distance=radius)

    avg = [candidates.loc[c_idx[s_idx == x],Value].mean() for x in range(0,max(s_idx)+1)]

    return avg


In [5]:
#Import the postcode list from the raw data gdf
name = "StPauls" 
gdf = kf.load_obj(root_path,"raw_gdf_" + name)

#Initial set up os selecting some points and polygons
GreenSpace = gdf.loc[gdf["Type"]=="GreenSpace",:].reset_index(drop=True)
All_GB = gdf.loc[gdf["Type"]=="All_GB",:].reset_index(drop=True)
LocalAuthorities = gdf.loc[gdf["Type"]=="LocalAuthorities",:].reset_index(drop=True)
Postcodes = gdf.loc[gdf["Type"]=="Postcodes",:]
Road = gdf.loc[gdf["Type"]=="Road",:].reset_index(drop=True)
LReg = gdf.loc[gdf["Type"]=="LReg",:].reset_index(drop=True)
Dentists = gdf.loc[gdf["Type"]=="Dentists",:].reset_index(drop=True)

In [6]:
dist_to_nearest(Postcodes, LReg)

array([27.65863337, 35.51056181, 20.        , ...,  7.61577311,
       77.52418977, 29.41088234])

In [7]:
within_radius(Postcodes, LReg, 500)

array([109, 436, 154, ..., 117, 130, 128], dtype=int64)

In [20]:
#Comment out as this isn't very good
#average_within_radius(Postcodes, LReg, 500, "Details_Float")

ValueError: Geometry array should be one dimensional

In [9]:
#time for a race
#Ball Tree Method vs pygeos methods

In [10]:
%%timeit
kf.density_within_radius(Postcodes,LReg, 500)

1.75 s ± 14.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
%%timeit
within_radius(Postcodes, LReg, 500)

3.53 s ± 91.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [12]:
#New method not as quick (twice as slow infact), but it has the advantage of working on all geometries

In [13]:
%%timeit
kf.nearest_neighbor_point(Postcodes, LReg, "Name", "Name", keep_n_g_d = [False, False, True], merge=False)

1.8 s ± 46.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [14]:
%%timeit
dist_to_nearest(Postcodes, LReg)

1.48 s ± 22.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [15]:
#New process is quicker and more versitile as it works with different geometires

In [16]:
%%timeit
kf.average_within_radius(Postcodes, LReg, "Details_Float", 500)

1.97 s ± 40.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [17]:
%%timeit
average_within_radius(Postcodes, LReg, 500, "Details_Float")

2min 52s ± 13.9 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [18]:
#I failed at making this using pygeos method. I'm sure it's there and possible. But I can't see it

#Add the two nearest functions to keyfunctions