# Sensor placement

[Simon Dobson](mailto:simon.dobson@st-andrews.ac.uk) <br>
School of Computer Science, University of St Andrews, Scotland UK

In [1]:
import json
from datetime import datetime, timedelta
from itertools import product, combinations
import numpy
import netCDF4 as nc
from pandas import DataFrame
from geopandas import GeoDataFrame, GeoSeries
from networkx import Graph

from sensor_placement import *

from pyproj import CRS, Transformer, Geod
from geovoronoi import voronoi_regions_from_coords, points_to_coords
import folium
import folium.plugins
import shapely
from shapely.geometry import Point, Polygon, shape
from shapely.ops import cascaded_union

### Import the CEH-GEAR dataset

The CEH_GEAR dataset is the "reference" datase for rainfall interpolated at 1km squares across Great Britain. 

In [2]:
days_base = datetime(year=1800, month=1, day=1)

In [3]:
def daysToDate(days):
    '''Return the date of the given day offset'''
    return days_base + timedelta(days=days) 

def dateToDays(date):
    '''Return the days offset to the given date.'''
    return date - days_base

def dateToIndex(ds, date):
    '''Return the index into the given dataset of the given date.'''
    return dateToDays(date).days - ds['time'][0]

In [4]:
ceh_filename='datasets/CEH_GEAR_monthly_GB_2017.nc'
ceh = nc.Dataset(ceh_filename)

In [5]:
xs, ys, ts, lats, lons = ceh['x'][:], ceh['y'][:], ceh['time'][:], ceh['lat'][:], ceh['lon'][:]
rainfall = ceh['rainfall_amount'][1, :, :]
distance = ceh['min_dist'][1, :, :]

In [57]:
print(daysToDate(min(ts)))

2017-01-01 00:00:00


The data is referenced by co-ordinates on the UK National Grid. Since we need to be able to also work in standard latitude-longitudfe co-ordinates, we construct a transformer between the two co-ordinate reference systems.  

In [175]:
uk_grid_crs = CRS.from_string('EPSG:27700')   # UK national grid
latlon_crs = CRS.from_string('EPSG:4326')     # global Mercator (WGS 84)

proj = Transformer.from_crs(uk_grid_crs, latlon_crs)
proj_inv = Transformer.from_crs(latlon_crs, uk_grid_crs)

### Import the boundaries of counties

We construct a dataframe with the names and b oundary shapoes of counties (and other administrative areas of the UK) by reading the master shape file.

In [7]:
boundaries_filename = 'datasets/UK_BUC.geojson'
with open(boundaries_filename, 'r') as fh:
    counties_json = json.load(fh)

In [8]:
counties = GeoDataFrame(columns=['county', 'geometry'])
for c in counties_json['features']:
    counties.loc[len(counties.index)] = {'county': c['properties']['ctyua18nm'],
                                         'geometry': shape(c['geometry'])}

We'll focus on Fife for the rest of this notebook.

In [9]:
fife = counties[counties['county'] == 'Fife'].iloc[0]

We extract the boundary polygon for Fife.

In [70]:
fife_boundary = fife['geometry']

In [95]:
mid_fife = list(list(fife_boundary.centroid.coords)[0])
mid_fife.reverse()
mid_fife

[56.22895222627837, -3.1263301904853984]

### Load the SEPA data

In [11]:
sepastations_filename = 'datasets/sepa-stations.json'
with open(sepastations_filename, 'r') as fh:
     sepastations_json = json.load(fh)

In [12]:
sepastations = GeoDataFrame(columns=['name', 'id', 'longitude', 'latitude', 'geometry'])
for i in range(len(sepastations_json)):
    s = sepastations_json[i]
    sepastations.loc[i] = {'id': int(s['station_no']),
                           'name': s['station_name'],
                           'longitude': float(s['station_longitude']),
                           'latitude': float(s['station_latitude']),
                           'geometry': Point(float(s['station_longitude']), float(s['station_latitude']))}

In [13]:
fife_stations = sepastations[sepastations['geometry'].within(fife_boundary)]

In [14]:
fife_stations

Unnamed: 0,name,id,longitude,latitude,geometry
9,Annfield,15198,-3.371633,56.066401,POINT (-3.37163 56.06640)
20,Baintown Rain-gauge,501947,-3.042679,56.221016,POINT (-3.04268 56.22102)
45,Cambo Sands,338380,-2.650067,56.30088,POINT (-2.65007 56.30088)
102,Fife Airport,15151,-3.222449,56.18165,POINT (-3.22245 56.18165)
157,Kinghorn Ecology Centre Rain Gauge,473550,-3.193433,56.074623,POINT (-3.19343 56.07462)
211,Newton of Falkland,15083,-3.186269,56.254315,POINT (-3.18627 56.25431)
235,Rossie Farm,15070,-3.202718,56.299126,POINT (-3.20272 56.29913)
237,Saline,15155,-3.585959,56.12224,POINT (-3.58596 56.12224)
249,St Monance,335620,-2.782714,56.204785,POINT (-2.78271 56.20479)
252,Strathkinness,11368,-2.863919,56.331369,POINT (-2.86392 56.33137)


### Displaying the reference data 

To display the rainfall data we extract from the CEH-GEAR dataset all the points that lie within the boundary of interest. We then normalise the rainfall into the range [0.0, 1.0] for simplicity.

In [15]:
fife_points = GeoDataFrame(columns=['i_east', 'i_north', 'longitude', 'latitude', 'geometry'])
for i in range(len(xs)):
    for j in range(len(ys)):
        if not rainfall.mask[j, i]:
            lat, lon = proj.transform(xs[i], ys[j])
            p = Point(lon, lat)
            if fife_boundary.contains(p):
                fife_points.loc[len(fife_points.index)] = {'i_east': i,
                                                           'i_north': j,
                                                           'longitude': lon,
                                                           'latitude': lat,
                                                           'geometry': p}

In [16]:
fife_points_rainfall = fife_points.copy()
fife_points_rainfall['rainfall'] = fife_points_rainfall.apply(lambda r: rainfall[r['i_north'], r['i_east']], axis=1)

# normalise
r_max = max(fife_points_rainfall['rainfall'])
r_min = min(fife_points_rainfall['rainfall'])
r_step = 1 / (r_max - r_min)
fife_points_rainfall['normalised_rainfall'] = (fife_points_rainfall['rainfall'] - r_min) * r_step

In [135]:
print('Rainfall {:.02f}--{:.02f}mm'.format(min(fife_points_rainfall['rainfall']), max(fife_points_rainfall['rainfall'])))

Rainfall 50.90--130.60mm


We can then construct a map with overlays for the region of interest and the interpolated rainfall.

In [164]:
uk_ceh = folium.Map(location=mid_fife, tiles="Stamen Terrain", zoom_start=10)

# add the boundary of Fife
folium.GeoJson(fife['geometry']).add_to(uk_ceh)

# add the stations
for i in range(len(fife_stations)):
    s = fife_stations.iloc[i]
    name, lon, lat = s['name'], s['longitude'], s['latitude']
    folium.Marker(location=(lat, lon),
                  tooltip=f'{name} ({lat:.2f}N, {lon:.2f}W)').add_to(uk_ceh)

# add the heat map
_ = folium.plugins.HeatMap(data=fife_points_rainfall[['latitude', 'longitude', 'normalised_rainfall']], min_opacity=0.01, radius=20, blur=20).add_to(uk_ceh)

In [165]:
uk_ceh

Alternatively, if the actual observations at the stations themselves are of interst, we can plot them too. We first load the raw data.

In [58]:
sepa_filename='datasets/sepa_monthly_2017.nc'
sepa = nc.Dataset(sepa_filename)

In [119]:
print(daysToDate(int(min(sepa['time'][:]))))

2017-01-01 00:00:00


In [144]:
ss = list(fife_stations['id'])
sis = list(map(lambda id:list(sepa['station'][:]).index(id), ss))
fis = sepa['rainfall_amount'][1, sis]
print('Rainfall {:.02f}--{:.02f}mm'.format(min(fis), max(fis)))

Rainfall 52.20--95.60mm


In [124]:
uk_stations = folium.Map(location=mid_fife, tiles="Stamen Terrain", zoom_start=10)
folium.GeoJson(fife['geometry']).add_to(uk_stations)

# add the station obsertaions
for i in range(len(fife_stations)):
    s = fife_stations.iloc[i]
    name, id, lon, lat = s['name'], s['id'], s['longitude'], s['latitude']
    j = list(sepa['station'][:]).index(id)
    rain = sepa['rainfall_amount'][1, j]
    folium.Marker(location=(lat, lon),
                  tooltip=f'{name} {rain:.2f}mm').add_to(uk_stations)

In [125]:
uk_stations

### Building a network of stations

We might also want to construct a neighbourhood graph from the stations. For this we assume that stations are the nodes of a network, with edges between nodes that are within a certain distance of each other.

In [22]:
g_fife = Graph()

# construct nodes for each station
for i in range(len(fife_stations)):
    s = fife_stations.iloc[i]
    g_fife.add_node(i)
    g_fife.nodes[i]['name'] = s['name']
    g_fife.nodes[i]['id'] = s['id']
    g_fife.nodes[i]['longitude'] = s['longitude']
    g_fife.nodes[i]['latitude'] = s['latitude']

# form edges between nodes within 25km of each other
geodesic = Geod(ellps='clrk66')
for n, m in combinations(list(g_fife.nodes()), 2):
    lon1, lat1 = g_fife.nodes[n]['longitude'], g_fife.nodes[n]['latitude']
    lon2, lat2 = g_fife.nodes[m]['longitude'], g_fife.nodes[m]['latitude']
    _, _, d = geodesic.inv(lon1, lat1, lon2, lat2)
    if d <= 25000:
        g_fife.add_edge(n, m)
        g_fife.edges[n, m]['distance'] = d

In [102]:
fife_stations_network = folium.Map(location=mid_fife, tiles="Stamen Terrain", zoom_start=10)
folium.GeoJson(fife['geometry']).add_to(fife_stations_network)

# add the stations
for n in g_fife.nodes():
    name, id, lat, lon = g_fife.nodes[n]['name'], g_fife.nodes[n]['id'], g_fife.nodes[n]['latitude'], g_fife.nodes[n]['longitude']
    i = list(sepa['station'][:]).index(id)
    rain = sepa['rainfall_amount'][1, i]
    folium.Marker(location=(lat, lon),
                  tooltip=f'{name} ({lat:.2f}N, {lon:.2f}W) {rain:.2f}mm').add_to(fife_stations_network)
        
# add the edges
for n, m in g_fife.edges():
    ps = [(g_fife.nodes[n]['latitude'], g_fife.nodes[n]['longitude']), (g_fife.nodes[m]['latitude'], g_fife.nodes[m]['longitude'])]
    tip = 'Distance {d:.2f}km'.format(d=g_fife.edges[n, m]['distance'] / 1000)
    folium.PolyLine(ps, color='red', tooltip=tip, weight=2, opacity=1).add_to(fife_stations_network)

In [103]:
fife_stations_network

### Constructing the Voronoi cells

Interpolation works by mapping observed values through the Voronoi cells (also known as the Thiessen polygon in geographical systems).

In [25]:
stations = fife_stations[['name', 'id', 'geometry']]
stations.crs = 'EPSG:4326'

boundary = GeoDataFrame(columns=['geometry'])
boundary.loc[len(boundary.index)] = {'geometry': fife['geometry']}
boundary.crs = 'EPSG:4326'

cell_centres = points_to_coords(stations['geometry'])
boundary_shape = cascaded_union(boundary['geometry'])

In [26]:
voronoi_cells, voronoi_centres = voronoi_regions_from_coords(cell_centres, boundary_shape)

In [27]:
fife_cells = GeoDataFrame(columns=['geometry'])
for c in voronoi_cells.values():
    fife_cells.loc[len(fife_cells.index)] = {'geometry': c}

In [104]:
fife_stations_voronoi = folium.Map(location=mid_fife, tiles="Stamen Terrain", zoom_start=10)
folium.GeoJson(fife['geometry']).add_to(fife_stations_voronoi)

# add the stations
for i in range(len(fife_stations)):
    s = fife_stations.iloc[i]
    name, id, lon, lat = s['name'], s['id'], s['longitude'], s['latitude']
    j = list(sepa['station'][:]).index(id)
    rain = sepa['rainfall_amount'][1, j]
    folium.Marker(location=(lat, lon),
                  tooltip=f'{name} ({lat:.2f}N, {lon:.2f}W) {rain:.2f}mm').add_to(fife_stations_voronoi)
    
# add the Voronoi cells
for c in fife_cells['geometry']:
    folium.GeoJson(c).add_to(fife_stations_voronoi)

In [105]:
fife_stations_voronoi

### Constructing the natural neighbour interpolation 

In [235]:
fife_rainfall = fife_stations.copy()
rainfall = []
for _, s in fife_stations.iterrows():
    name, id = s['name'], s['id']
    j = list(sepa['station'][:]).index(id)
    rainfall.append(sepa['rainfall_amount'][1, j])
fife_rainfall['rainfall'] = rainfall

In [236]:
fife_rainfall

Unnamed: 0,name,id,longitude,latitude,geometry,rainfall
9,Annfield,15198,-3.371633,56.066401,POINT (-3.37163 56.06640),79.0
20,Baintown Rain-gauge,501947,-3.042679,56.221016,POINT (-3.04268 56.22102),82.2
45,Cambo Sands,338380,-2.650067,56.30088,POINT (-2.65007 56.30088),52.2
102,Fife Airport,15151,-3.222449,56.18165,POINT (-3.22245 56.18165),95.6
157,Kinghorn Ecology Centre Rain Gauge,473550,-3.193433,56.074623,POINT (-3.19343 56.07462),71.6
211,Newton of Falkland,15083,-3.186269,56.254315,POINT (-3.18627 56.25431),87.4
235,Rossie Farm,15070,-3.202718,56.299126,POINT (-3.20272 56.29913),77.8
237,Saline,15155,-3.585959,56.12224,POINT (-3.58596 56.12224),88.0
249,St Monance,335620,-2.782714,56.204785,POINT (-2.78271 56.20479),61.6
252,Strathkinness,11368,-2.863919,56.331369,POINT (-2.86392 56.33137),72.6


In [262]:
# find the boundaries of the region of interest
x_min, y_min, x_max, y_max = fife_boundary.bounds

# how many kilometre squares is that?
xg_min, yg_min = proj_inv.transform(y_min, x_min)
xg_max, yg_max = proj_inv.transform(y_max, x_max)
es = int((xg_max - xg_min) / 1000) + 1
ns = int((yg_max - yg_min) / 1000) + 1

xs = numpy.linspace(x_min, x_max, num=es, endpoint=True)
ys = numpy.linspace(y_min, y_max, num=ns, endpoint=True)

In [263]:
df_grid = natural_nearest_neighbour(fife_rainfall, fife_boundary, xs, ys)

In [264]:
df_grid

Unnamed: 0,x,y,geometry,rainfall
56,1,7,POINT (-3.72388 56.07095),88.000000
57,1,8,POINT (-3.72388 56.08026),88.000000
58,1,9,POINT (-3.72388 56.08956),88.000000
104,2,6,POINT (-3.70784 56.06165),88.000000
105,2,7,POINT (-3.70784 56.07095),88.000000
...,...,...,...,...
1831,37,18,POINT (-3.14665 56.17330),85.744292
1832,37,19,POINT (-3.14665 56.18260),85.944803
1878,38,16,POINT (-3.13061 56.15469),84.609732
1879,38,17,POINT (-3.13061 56.16399),84.889194


In [269]:
# normalise rainfall, using the same values as for the CEH-GEAR data so
# we get comparable colour schemes
r_max = max(fife_points_rainfall['rainfall'])
r_min = min(fife_points_rainfall['rainfall'])
r_step = 1 / (r_max - r_min)
df_grid['normalised_rainfall'] = (df_grid['rainfall'] - r_min) * r_step

# break-out lon and lat
lats = []
lons = []
for _, r in df_grid.iterrows():
    lon, lat = list(r.geometry.coords)[0]
    lats.append(lat)
    lons.append(lon)
df_grid['latitude'] = lats
df_grid['longitude'] = lons

In [270]:
fife_stations_interpolation = folium.Map(location=mid_fife, tiles="Stamen Terrain", zoom_start=10)
folium.GeoJson(fife['geometry']).add_to(fife_stations_interpolation)

# add the stations
for i in range(len(fife_stations)):
    s = fife_stations.iloc[i]
    name, id, lon, lat = s['name'], s['id'], s['longitude'], s['latitude']
    j = list(sepa['station'][:]).index(id)
    rain = sepa['rainfall_amount'][1, j]
    folium.Marker(location=(lat, lon),
                  tooltip=f'{name} ({lat:.2f}N, {lon:.2f}W) {rain:.2f}mm').add_to(fife_stations_interpolation)

# add the Voronoi cell boundaries
for c in fife_cells['geometry']:
    folium.GeoJson(c, style_function=lambda f: {'fill': False}).add_to(fife_stations_interpolation)
    
# add the heat map
_ = folium.plugins.HeatMap(data=df_grid[['latitude', 'longitude', 'normalised_rainfall']], min_opacity=0.01, radius=20, blur=20).add_to(fife_stations_interpolation)

In [271]:
fife_stations_interpolation

In [258]:
uk_ceh

In [268]:
len(df_grid), len(fife_points_rainfall)

(1287, 1330)