From 384f3f45fd77f256e8146e5ef2017b3fd4ec8f94 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 27 Sep 2021 11:53:27 +0100 Subject: [PATCH 1/5] calculate centroid before using to_crs() --- notebooks/2021-09/2021-09-14/gsp_centroid.py | 17 +++++---- nowcasting_dataset/data_sources/gsp/eso.py | 37 +++++++++++++++---- .../data_sources/gsp/gsp_data_source.py | 5 +-- .../data_sources/gsp/test_gsp_data_source.py | 2 + 4 files changed, 43 insertions(+), 18 deletions(-) diff --git a/notebooks/2021-09/2021-09-14/gsp_centroid.py b/notebooks/2021-09/2021-09-14/gsp_centroid.py index 472af293..f1e4d90f 100644 --- a/notebooks/2021-09/2021-09-14/gsp_centroid.py +++ b/notebooks/2021-09/2021-09-14/gsp_centroid.py @@ -7,8 +7,8 @@ # load data shape_data_raw = get_gsp_metadata_from_eso() -shape_data_raw = shape_data_raw.sort_values(by=['RegionName']) -shape_data_raw['Amount'] = 0 +shape_data_raw = shape_data_raw.sort_values(by=["RegionName"]) +shape_data_raw["Amount"] = 0 # for index in range(0, len(shape_data_raw)): for index in range(140, 150): @@ -16,8 +16,8 @@ # just select the first one shape_data = shape_data_raw.iloc[index : index + 1] shapes_dict = json.loads(shape_data["geometry"].to_json()) - lon = shape_data["centroid_x"].iloc[0] - lat = shape_data["centroid_y"].iloc[0] + lon = shape_data["centroid_lon"].iloc[0] + lat = shape_data["centroid_lat"].iloc[0] gsp_lon = shape_data["gsp_lon"].iloc[0] gsp_lat = shape_data["gsp_lat"].iloc[0] @@ -47,7 +47,11 @@ ) fig.add_trace( go.Scattermapbox( - lon=[gsp_lon], lat=[gsp_lat], mode="markers", name="GSP Location", marker=dict(size=[10]) + lon=[gsp_lon], + lat=[gsp_lat], + mode="markers", + name="GSP Location", + marker=dict(size=[10]), ) ) fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) @@ -56,6 +60,3 @@ # fig.show(renderer="browser") fig.write_html(f"images/{region_name}.html") fig.write_image(f"images/{region_name}_{index}.png") - - - diff --git a/nowcasting_dataset/data_sources/gsp/eso.py b/nowcasting_dataset/data_sources/gsp/eso.py index 70824e69..120e7655 100644 --- a/nowcasting_dataset/data_sources/gsp/eso.py +++ b/nowcasting_dataset/data_sources/gsp/eso.py @@ -21,10 +21,19 @@ import geopandas as gpd import pandas as pd -from nowcasting_dataset.geospatial import WGS84_CRS +from nowcasting_dataset.geospatial import osgb_to_lat_lon, WGS84_CRS + logger = logging.getLogger(__name__) +rename_save_columns = { + "centroid_x": "cen_x", + "centroid_y": "cen_y", + "centroid_lat": "cen_lat", + "centroid_lon": "cen_lon", +} +rename_load_columns = {v: k for k, v in rename_save_columns.items()} + def get_gsp_metadata_from_eso(calculate_centroid: bool = True) -> pd.DataFrame: """ @@ -63,10 +72,6 @@ def get_gsp_metadata_from_eso(calculate_centroid: bool = True) -> pd.DataFrame: metadata.merge(shape_data, right_on="RegionID", left_on="region_id", how="left") ) - # make centroid - metadata["centroid_x"] = metadata["geometry"].centroid.x - metadata["centroid_y"] = metadata["geometry"].centroid.y - return metadata @@ -95,6 +100,8 @@ def get_gsp_shape_from_eso( if load_local_file: logger.debug("loading local file for GSP shape data") shape_gpd = gpd.read_file(local_file) + # rename the columns to full name + shape_gpd.rename(columns=rename_load_columns, inplace=True) logger.debug("loading local file for GSP shape data:done") else: # call ESO website. There is a possibility that this API will be replaced and its unclear if this original API will @@ -105,10 +112,26 @@ def get_gsp_shape_from_eso( ) with urlopen(url) as response: - shape_gpd = gpd.read_file(response).to_crs(WGS84_CRS) + shape_gpd = gpd.read_file(response) + + # calculate the centroid before using - to_crs + shape_gpd["centroid_x"] = shape_gpd["geometry"].centroid.x + shape_gpd["centroid_y"] = shape_gpd["geometry"].centroid.y + shape_gpd["centroid_lat"], shape_gpd["centroid_lon"] = osgb_to_lat_lon( + x=shape_gpd["centroid_x"], y=shape_gpd["centroid_y"] + ) + + # project to WGS84 i.e + shape_gpd = shape_gpd.to_crs(WGS84_CRS) if save_local_file: - shape_gpd.to_file(local_file) + + # rename the columns to less than 10 characters + shape_gpd_to_save = shape_gpd.copy() + shape_gpd_to_save.rename(columns=rename_save_columns, inplace=True) + + # save file + shape_gpd_to_save.to_file(local_file) # sort shape_gpd = shape_gpd.sort_values(by=["RegionID"]) diff --git a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py index 6577110d..76b3f975 100644 --- a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py +++ b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py @@ -77,9 +77,8 @@ def load(self): self.metadata = get_gsp_metadata_from_eso() # make location x,y in osgb - self.metadata["location_x"], self.metadata["location_y"] = lat_lon_to_osgb( - lat=self.metadata["centroid_y"], lon=self.metadata["centroid_x"] - ) + self.metadata["location_x"] = self.metadata["centroid_x"] + self.metadata["location_y"] = self.metadata["centroid_y"] # load gsp data from file / gcp self.gsp_power = load_solar_gsp_data( diff --git a/tests/data_sources/gsp/test_gsp_data_source.py b/tests/data_sources/gsp/test_gsp_data_source.py index ef47da18..7154f292 100644 --- a/tests/data_sources/gsp/test_gsp_data_source.py +++ b/tests/data_sources/gsp/test_gsp_data_source.py @@ -46,6 +46,8 @@ def test_gsp_pv_data_source_get_locations_for_batch(): locations_x, locations_y = gsp.get_locations_for_batch(t0_datetimes=gsp.gsp_power.index[0:10]) assert len(locations_x) == len(locations_y) + assert locations_x[0] > 180 # this makes sure it is not in lat/lon + assert locations_y[0] > 90 # this makes sure it is not in lat/lon def test_gsp_pv_data_source_get_example(): From 621a735e3518b3ff57fe089848e1025c9ac997f8 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 27 Sep 2021 12:04:25 +0100 Subject: [PATCH 2/5] PR self review --- nowcasting_dataset/data_sources/gsp/eso.py | 2 +- nowcasting_dataset/data_sources/gsp/gsp_data_source.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/nowcasting_dataset/data_sources/gsp/eso.py b/nowcasting_dataset/data_sources/gsp/eso.py index 120e7655..8f662e38 100644 --- a/nowcasting_dataset/data_sources/gsp/eso.py +++ b/nowcasting_dataset/data_sources/gsp/eso.py @@ -121,7 +121,7 @@ def get_gsp_shape_from_eso( x=shape_gpd["centroid_x"], y=shape_gpd["centroid_y"] ) - # project to WGS84 i.e + # project to WGS84 shape_gpd = shape_gpd.to_crs(WGS84_CRS) if save_local_file: diff --git a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py index 76b3f975..10325943 100644 --- a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py +++ b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py @@ -77,8 +77,9 @@ def load(self): self.metadata = get_gsp_metadata_from_eso() # make location x,y in osgb - self.metadata["location_x"] = self.metadata["centroid_x"] - self.metadata["location_y"] = self.metadata["centroid_y"] + self.metadata["location_x"], self.metadata["location_y"] = lat_lon_to_osgb( + lat=self.metadata["centroid_lat"], lon=self.metadata["centroid_lon"] + ) # load gsp data from file / gcp self.gsp_power = load_solar_gsp_data( From 4d23f20eae6dbd4b1eb298e0c51f9102a565b355 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 27 Sep 2021 12:06:53 +0100 Subject: [PATCH 3/5] add extra check --- tests/data_sources/gsp/test_gsp_data_source.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/data_sources/gsp/test_gsp_data_source.py b/tests/data_sources/gsp/test_gsp_data_source.py index 7154f292..9447680c 100644 --- a/tests/data_sources/gsp/test_gsp_data_source.py +++ b/tests/data_sources/gsp/test_gsp_data_source.py @@ -1,17 +1,12 @@ import os -import pytz -from nowcasting_dataset.data_sources.gsp.pvlive import load_pv_gsp_raw_data_from_pvlive -from nowcasting_dataset.data_sources.gsp.eso import ( - get_gsp_metadata_from_eso, - get_gsp_shape_from_eso, -) -import pandas as pd -import geopandas as gpd from datetime import datetime +import pandas as pd + import nowcasting_dataset from nowcasting_dataset.consts import T0_DT from nowcasting_dataset.data_sources.gsp.gsp_data_source import GSPDataSource +from nowcasting_dataset.geospatial import osgb_to_lat_lon def test_gsp_pv_data_source_init(): @@ -46,9 +41,14 @@ def test_gsp_pv_data_source_get_locations_for_batch(): locations_x, locations_y = gsp.get_locations_for_batch(t0_datetimes=gsp.gsp_power.index[0:10]) assert len(locations_x) == len(locations_y) - assert locations_x[0] > 180 # this makes sure it is not in lat/lon + assert locations_x[0] > 90 # this makes sure it is not in lat/lon assert locations_y[0] > 90 # this makes sure it is not in lat/lon + lat, lon = osgb_to_lat_lon(locations_x, locations_y) + + assert lat[0] < 90 # this makes sure it is in lat/lon + assert -90 < lon[0] < 90 # this makes sure it is in lat/lon + def test_gsp_pv_data_source_get_example(): local_path = os.path.dirname(nowcasting_dataset.__file__) + "/.." From 472c04e324af039133303a16aabfa24afd3a04f8 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Mon, 27 Sep 2021 12:07:39 +0100 Subject: [PATCH 4/5] small extra test --- tests/data_sources/gsp/test_gsp_data_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/data_sources/gsp/test_gsp_data_source.py b/tests/data_sources/gsp/test_gsp_data_source.py index 9447680c..4709ede4 100644 --- a/tests/data_sources/gsp/test_gsp_data_source.py +++ b/tests/data_sources/gsp/test_gsp_data_source.py @@ -46,7 +46,7 @@ def test_gsp_pv_data_source_get_locations_for_batch(): lat, lon = osgb_to_lat_lon(locations_x, locations_y) - assert lat[0] < 90 # this makes sure it is in lat/lon + assert 0 < lat[0] < 90 # this makes sure it is in lat/lon assert -90 < lon[0] < 90 # this makes sure it is in lat/lon From 7eb5bdc62cf6cfa762544db1024c02a44ffde591 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Wed, 29 Sep 2021 09:52:21 +0100 Subject: [PATCH 5/5] PR comments --- notebooks/2021-09/2021-09-14/gsp_centroid.py | 3 +++ nowcasting_dataset/data_sources/gsp/eso.py | 11 ++++++++--- tests/data_sources/gsp/test_gsp_data_source.py | 7 +++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/notebooks/2021-09/2021-09-14/gsp_centroid.py b/notebooks/2021-09/2021-09-14/gsp_centroid.py index f1e4d90f..08396aa6 100644 --- a/notebooks/2021-09/2021-09-14/gsp_centroid.py +++ b/notebooks/2021-09/2021-09-14/gsp_centroid.py @@ -4,9 +4,12 @@ ) import plotly.graph_objects as go import json +from nowcasting_dataset.geospatial import WGS84_CRS + # load data shape_data_raw = get_gsp_metadata_from_eso() +shape_data_raw = shape_data_raw.to_crs(WGS84_CRS) shape_data_raw = shape_data_raw.sort_values(by=["RegionName"]) shape_data_raw["Amount"] = 0 diff --git a/nowcasting_dataset/data_sources/gsp/eso.py b/nowcasting_dataset/data_sources/gsp/eso.py index 8f662e38..0fd28f27 100644 --- a/nowcasting_dataset/data_sources/gsp/eso.py +++ b/nowcasting_dataset/data_sources/gsp/eso.py @@ -21,11 +21,14 @@ import geopandas as gpd import pandas as pd -from nowcasting_dataset.geospatial import osgb_to_lat_lon, WGS84_CRS +from nowcasting_dataset.geospatial import osgb_to_lat_lon logger = logging.getLogger(__name__) +# When saving a file, the columns need to be less than 10 characters - +# - https://github.com/geopandas/geopandas/issues/1417 +# - https://en.wikipedia.org/wiki/Shapefile#Limitations rename_save_columns = { "centroid_x": "cen_x", "centroid_y": "cen_y", @@ -121,8 +124,10 @@ def get_gsp_shape_from_eso( x=shape_gpd["centroid_x"], y=shape_gpd["centroid_y"] ) - # project to WGS84 - shape_gpd = shape_gpd.to_crs(WGS84_CRS) + # Decided not project the shape data to WGS84, as we want to keep + # all 'batch' data the same projection. + # However when plotting it may be useful to project to WGS84 + # i.e shape_gpd = shape_gpd.to_crs(WGS84_CRS) if save_local_file: diff --git a/tests/data_sources/gsp/test_gsp_data_source.py b/tests/data_sources/gsp/test_gsp_data_source.py index 4709ede4..2070ccf8 100644 --- a/tests/data_sources/gsp/test_gsp_data_source.py +++ b/tests/data_sources/gsp/test_gsp_data_source.py @@ -41,8 +41,11 @@ def test_gsp_pv_data_source_get_locations_for_batch(): locations_x, locations_y = gsp.get_locations_for_batch(t0_datetimes=gsp.gsp_power.index[0:10]) assert len(locations_x) == len(locations_y) - assert locations_x[0] > 90 # this makes sure it is not in lat/lon - assert locations_y[0] > 90 # this makes sure it is not in lat/lon + # This makes sure it is not in lat/lon. + # Note that OSGB could be <= than 90, but that would mean a location in the middle of the sea, + # which is impossible for GSP data + assert locations_x[0] > 90 + assert locations_y[0] > 90 lat, lon = osgb_to_lat_lon(locations_x, locations_y)