# Faster method

Author: Geoff Boeing

For each spatial geometry, this notebook sums the values of every spatial geometry within 500m.

In [1]:
import sys, csv, time
import pandas as pd
import geopandas as gpd
from shapely.wkt import loads

In [2]:
# monitor performance
start_time = time.time()
def log(message):
    global start_time
    print('{:,.3f}s: {}'.format(time.time()-start_time, message))
    start_time = time.time()

## 1) Load and format the data

In [3]:
filepath = './example_geometries.csv'
crs = {'init': 'epsg:32154'}
df = pd.read_csv(filepath)
log('loaded csv')

0.227s: loaded csv


In [4]:
# create a geometry geoseries from the wkt
geometry = gpd.GeoSeries(df['geometry'].map(lambda x: loads(x)))
log('created geometry')

1.141s: created geometry


In [5]:
gdf = gpd.GeoDataFrame(data=df[['id', 'value']], crs=crs, geometry=geometry)
gdf = gdf.set_index('id')
log('converted df to gdf')

0.021s: converted df to gdf


## 2) Create spatial index and buffer the geometry

In [6]:
sidx = gdf.sindex
log('created spatial index')

2.603s: created spatial index


In [7]:
# projection is in meters, buffer itself is unit agnostic
gdf['buffer'] = gdf['geometry'].buffer(500)
log('buffered geometries')

4.953s: buffered geometries


## 3) Sum values of all rows within each row's buffered distance

In [8]:
def summarize(buffer):
    possible_matches_index = list(sidx.intersection(buffer.bounds))
    possible_matches = gdf.iloc[possible_matches_index]
    precise_matches = possible_matches[possible_matches.intersects(buffer)]
    return precise_matches['value'].sum()

In [9]:
sums = gdf['buffer'].map(summarize)
sums.name = 'value sum'
log('calculated value sums')

79.330s: calculated value sums


In [10]:
sums.head()

id
14856    10319.032630
8716        32.595417
661          3.991619
709         14.131560
717         32.533223
Name: value sum, dtype: float64

The algorithm calculated 29,873 rows in 79.3 seconds -- that's about 2.7 milliseconds per row.