# The EPA dataset

[Simon Dobson](mailto:simon.dobson@st-andrews.ac.uk) <br>
School of Computer Science, University of St Andrews, Scotland UK

In [4]:
import json
import pickle
from datetime import datetime, timedelta
import numpy
import netCDF4 as nc
from pandas import DataFrame
from geopandas import GeoDataFrame, GeoSeries, read_file
from networkx import Graph

from sensor_placement import *

from pyproj import CRS, Transformer, Geod
import folium
import folium.plugins
import shapely
from shapely.geometry import Point, Polygon, MultiPolygon, MultiPoint, shape
from shapely.ops import cascaded_union

In [2]:
uk_grid_crs = CRS.from_string('EPSG:27700')   # UK national grid
latlon_crs = CRS.from_string('EPSG:4326')     # global Mercator (WGS 84)

proj = Transformer.from_crs(uk_grid_crs, latlon_crs)
proj_inv = Transformer.from_crs(latlon_crs, uk_grid_crs)

In [5]:
boundaries_filename = 'datasets/UK_BUC.geojson'
with open(boundaries_filename, 'r') as fh:
    counties_json = json.load(fh)

In [7]:
counties = GeoDataFrame({'county': map(lambda c: c['properties']['ctyua18nm'], counties_json['features']),
                         'geometry': map(lambda c: shape(c['geometry']), counties_json['features'])})

In [8]:
epa_filename='datasets/epa_daily_2022-03-07_2022-03-14.nc'
epa = nc.Dataset(epa_filename)

In [10]:
epastations = GeoDataFrame({'id': numpy.asarray(epa['station']).astype(int),
                             'name':numpy.asarray(epa['name']),
                             'east':numpy.asarray(epa['x']).astype(int),
                             'north':numpy.asarray(epa['y']).astype(int),
                             'longitude':numpy.asarray(epa['long']).astype(float),
                             'latitude': numpy.asarray(epa['lat']).astype(float)})
epastations['geometry'] = epastations.apply(lambda r: Point(r['longitude'], r['latitude']), axis=1)
epastations.set_index('id', inplace=True)

  arr = construct_1d_object_array_from_listlike(values)


In [11]:
points = MultiPoint(list(epastations['geometry']))
epa_boundary = points.buffer(1).buffer(-1).buffer(0.05)

In [12]:
epa_map = folium.Map(location=(55, -3), tiles="Stamen Terrain", zoom_start=6)

# add the stations
epa_layer = folium.FeatureGroup(name='EPA stations')
for i in range(len(epastations)):
    s = epastations.iloc[i]
    name, lon, lat = s['name'], s['longitude'], s['latitude']
    folium.Marker(location=(lat, lon),
                  tooltip=f'{name} ({lat:.2f}N, {lon:.2f}W)',
                  icon=folium.Icon(color='red', icon='cloud')).add_to(epa_layer)
epa_layer.add_to(epa_map)

# add the boundary
hull_layer = folium.FeatureGroup(name='Boundary')
folium.GeoJson(epa_boundary).add_to(hull_layer)
hull_layer.add_to(epa_map)

# add a layer countrol
_ = folium.LayerControl().add_to(epa_map)

In [13]:
epa_map

In [14]:
x_min, y_min, x_max, y_max = epa_boundary.bounds

xg_min, yg_min = proj_inv.transform(y_min, x_min)
xg_max, yg_max = proj_inv.transform(y_max, x_max)
es = int((xg_max - xg_min) / 1000) + 1
ns = int((yg_max - yg_min) / 1000) + 1

xs = numpy.linspace(x_min, x_max, num=es, endpoint=True)
ys = numpy.linspace(y_min, y_max, num=ns, endpoint=True)

In [16]:
%%timeit -n1 -r1

global epa_tensor
epa_tensor = NNNI(epastations, epa_boundary, ys, xs, cores=1)

20min 27s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)


In [17]:
with open('datasets/epa_tensor.pickle', 'wb') as fh:
    pickle.dump(epa_tensor, fh)