# Data check

[Simon Dobson](mailto:simon.dobson@st-andrews.ac.uk) <br>
School of Computer Science, University of St Andrews, Scotland UK

This notebook is intended to check the rain gauge datasets downloaded and created from SEPA and CEDA. 

In [20]:
import json
from datetime import date, datetime, timedelta
import netCDF4 as nc
from geopandas import GeoDataFrame

from sensor_placement.data import uk_epa

from pyproj import CRS, Transformer
from shapely.geometry import Point, shape
import folium

In [2]:
uk_grid_crs = CRS.from_string('EPSG:27700')   # UK national grid
latlon_crs = CRS.from_string('EPSG:4326')     # global Mercator (WGS 84)

proj = Transformer.from_crs(uk_grid_crs, latlon_crs)

## County boundaries

In [3]:
boundaries_filename = 'datasets/UK_BUC.geojson'
with open(boundaries_filename, 'r') as fh:
    counties_json = json.load(fh)

In [4]:
counties = GeoDataFrame(columns=['county', 'geometry'])
for c in counties_json['features']:
    counties.loc[len(counties.index)] = {'county': c['properties']['ctyua18nm'],
                                         'geometry': shape(c['geometry'])}

In [5]:
counties[counties.geometry.is_valid == False]

Unnamed: 0,county,geometry
134,Hampshire,"MULTIPOLYGON (((-0.76891 51.10327, -0.75944 51..."


## SEPA rain gauges and a sample month

In [6]:
sepa_filename = 'datasets/sepa_monthly_2017.nc'
sepa = nc.Dataset(sepa_filename)

In [7]:
sepastations = GeoDataFrame(columns=['name', 'id', 'longitude', 'latitude', 'geometry'])
for i in range(len(sepa['station'])):
    lat, lon = proj.transform(float(sepa['x'][i]), float(sepa['y'][i]))
    sepastations.loc[i] = {'id': int(sepa['station'][i]),
                           'name': sepa['name'][i],
                           'longitude': lon,
                           'latitude': lat,
                           'geometry': Point(lon, lat)}
sepastations.set_index('id', inplace=True)

In [8]:
sepastations

Unnamed: 0_level_0,name,longitude,latitude,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
15018,Abbey St Bathans,-2.400869,55.850549,POINT (-2.40087 55.85055)
234150,Aberlour,-3.218772,57.471363,POINT (-3.21877 57.47136)
115301,Affric Lodge,-5.019301,57.261328,POINT (-5.01930 57.26133)
234170,Alford,-2.730662,57.241578,POINT (-2.73066 57.24158)
115302,Allanfearn,-4.153978,57.494793,POINT (-4.15398 57.49479)
...,...,...,...,...
115660,Waulkmill Glen,-4.362137,55.792568,POINT (-4.36214 55.79257)
116008,Weisdale Mill,-1.297096,60.259372,POINT (-1.29710 60.25937)
115250,Westhill,-2.299088,57.144534,POINT (-2.29909 57.14453)
14881,Whitburn,-3.695373,55.866478,POINT (-3.69537 55.86648)


## CEDA rain gauges and a sample month 

In [11]:
ceda_filename = 'datasets/ceda_midas_monthly_2017.nc'
ceda = nc.Dataset(ceda_filename)

In [12]:
cedastations = GeoDataFrame(columns=['name', 'id', 'longitude', 'latitude', 'geometry'])
for i in range(len(ceda['station'])):
    lat, lon = proj.transform(float(ceda['x'][i]), float(ceda['y'][i]))
    cedastations.loc[i] = {'id': int(ceda['station'][i]),
                           'name': ceda['name'][i],
                           'longitude': lon,
                           'latitude': lat,
                           'geometry': Point(lon, lat)}
cedastations.set_index('id', inplace=True)

In [13]:
cedastations

Unnamed: 0_level_0,name,longitude,latitude,geometry
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
38,achfary,-4.920076,58.306182,POINT (-4.92008 58.30618)
8231,huntsham,-3.439265,50.952553,POINT (-3.43927 50.95255)
50,strathy-east,-3.995386,58.557531,POINT (-3.99539 58.55753)
60,poolewe,-5.599708,57.767447,POINT (-5.59971 57.76745)
64,plockton,-5.656926,57.334277,POINT (-5.65693 57.33428)
...,...,...,...,...
24219,mannington-hall,1.176164,52.842971,POINT (1.17616 52.84297)
57118,bute-rothesay-no2,-5.066457,55.821956,POINT (-5.06646 55.82196)
57233,margam-no-2,-3.732128,51.551088,POINT (-3.73213 51.55109)
57266,saltfleetby-st-clements,0.179242,53.395922,POINT (0.17924 53.39592)


## UK EPA live rainfall

In [23]:
today = datetime.now()
start = today - timedelta(days=2)

epa = uk_epa(start, today, None)

No location information for Rainfall station
No location information for Rainfall station
No location information for Rainfall station
No location information for Rainfall station
.........................................................................................................................................................................................................................................................................................................................................................................................................................No value for reading (ignored)
No value for reading (ignored)
No value for reading (ignored)
No value for reading (ignored)
No value for reading (ignored)
No value for reading (ignored)
No value for reading (ignored)
No value for reading (ignored)
No value for reading (ignored)
No value for reading (ignored)
No value for reading (ignored)
......................................................................

In [24]:
epastations = GeoDataFrame(columns=['name', 'id', 'longitude', 'latitude', 'geometry'])
for i in range(len(epa['station'])):
    lat, lon = proj.transform(float(epa['x'][i]), float(epa['y'][i]))
    epastations.loc[i] = {'id': int(epa['station'][i]),
                          'name': epa['name'][i],
                          'longitude': lon,
                          'latitude': lat,
                          'geometry': Point(lon, lat)}
epastations.set_index('id', inplace=True)

## The datasets

In [25]:
uk_gauges = folium.Map(location=(55, -3), tiles="Stamen Terrain", zoom_start=6)

# add the boundaries
county_boundaries_layer = folium.FeatureGroup(name='County boundaries')
for _, r in counties.iterrows():
    folium.GeoJson(r['geometry']).add_to(county_boundaries_layer)
county_boundaries_layer.add_to(uk_gauges)
    
# add the SEPA stations
sepa_layer = folium.FeatureGroup(name='SEPA stations')
for i in sepastations.index:
    s = sepastations.loc[i]
    name, lon, lat = s['name'], s['longitude'], s['latitude']
    folium.Marker(location=(lat, lon),
                  tooltip=f'{i}: {name} ({lat:.2f}N, {lon:.2f}W)',
                  icon=folium.Icon(color='blue', icon='cloud')).add_to(sepa_layer)
sepa_layer.add_to(uk_gauges)

# add the CEDA stations
ceda_layer = folium.FeatureGroup(name='CEDA MIDAS stations')
for i in cedastations.index:
    s = cedastations.loc[i]
    name, lon, lat = s['name'], s['longitude'], s['latitude']
    folium.Marker(location=(lat, lon),
                  tooltip=f'{i}: {name} ({lat:.2f}N, {lon:.2f}W)',
                  icon=folium.Icon(color='green', icon='cloud')).add_to(ceda_layer)
ceda_layer.add_to(uk_gauges)

# add the EPA live stations
epa_layer = folium.FeatureGroup(name='EPA stations')
for i in epastations.index:
    s = epastations.loc[i]
    name, lon, lat = s['name'], s['longitude'], s['latitude']
    folium.Marker(location=(lat, lon),
                  tooltip=f'{i}: {name} ({lat:.2f}N, {lon:.2f}W)',
                  icon=folium.Icon(color='red', icon='cloud')).add_to(epa_layer)
epa_layer.add_to(uk_gauges)
    
# add a layer countrol
_ = folium.LayerControl().add_to(uk_gauges)

In [26]:
uk_gauges