diff --git a/notebooks/2021-09/2021-09-14/gsp_centroid.py b/notebooks/2021-09/2021-09-14/gsp_centroid.py index 472af293..08396aa6 100644 --- a/notebooks/2021-09/2021-09-14/gsp_centroid.py +++ b/notebooks/2021-09/2021-09-14/gsp_centroid.py @@ -4,11 +4,14 @@ ) import plotly.graph_objects as go import json +from nowcasting_dataset.geospatial import WGS84_CRS + # load data shape_data_raw = get_gsp_metadata_from_eso() -shape_data_raw = shape_data_raw.sort_values(by=['RegionName']) -shape_data_raw['Amount'] = 0 +shape_data_raw = shape_data_raw.to_crs(WGS84_CRS) +shape_data_raw = shape_data_raw.sort_values(by=["RegionName"]) +shape_data_raw["Amount"] = 0 # for index in range(0, len(shape_data_raw)): for index in range(140, 150): @@ -16,8 +19,8 @@ # just select the first one shape_data = shape_data_raw.iloc[index : index + 1] shapes_dict = json.loads(shape_data["geometry"].to_json()) - lon = shape_data["centroid_x"].iloc[0] - lat = shape_data["centroid_y"].iloc[0] + lon = shape_data["centroid_lon"].iloc[0] + lat = shape_data["centroid_lat"].iloc[0] gsp_lon = shape_data["gsp_lon"].iloc[0] gsp_lat = shape_data["gsp_lat"].iloc[0] @@ -47,7 +50,11 @@ ) fig.add_trace( go.Scattermapbox( - lon=[gsp_lon], lat=[gsp_lat], mode="markers", name="GSP Location", marker=dict(size=[10]) + lon=[gsp_lon], + lat=[gsp_lat], + mode="markers", + name="GSP Location", + marker=dict(size=[10]), ) ) fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0}) @@ -56,6 +63,3 @@ # fig.show(renderer="browser") fig.write_html(f"images/{region_name}.html") fig.write_image(f"images/{region_name}_{index}.png") - - - diff --git a/nowcasting_dataset/data_sources/gsp/eso.py b/nowcasting_dataset/data_sources/gsp/eso.py index 70824e69..0fd28f27 100644 --- a/nowcasting_dataset/data_sources/gsp/eso.py +++ b/nowcasting_dataset/data_sources/gsp/eso.py @@ -21,10 +21,22 @@ import geopandas as gpd import pandas as pd -from nowcasting_dataset.geospatial import WGS84_CRS +from nowcasting_dataset.geospatial import osgb_to_lat_lon + logger = logging.getLogger(__name__) +# When saving a file, the columns need to be less than 10 characters - +# - https://github.com/geopandas/geopandas/issues/1417 +# - https://en.wikipedia.org/wiki/Shapefile#Limitations +rename_save_columns = { + "centroid_x": "cen_x", + "centroid_y": "cen_y", + "centroid_lat": "cen_lat", + "centroid_lon": "cen_lon", +} +rename_load_columns = {v: k for k, v in rename_save_columns.items()} + def get_gsp_metadata_from_eso(calculate_centroid: bool = True) -> pd.DataFrame: """ @@ -63,10 +75,6 @@ def get_gsp_metadata_from_eso(calculate_centroid: bool = True) -> pd.DataFrame: metadata.merge(shape_data, right_on="RegionID", left_on="region_id", how="left") ) - # make centroid - metadata["centroid_x"] = metadata["geometry"].centroid.x - metadata["centroid_y"] = metadata["geometry"].centroid.y - return metadata @@ -95,6 +103,8 @@ def get_gsp_shape_from_eso( if load_local_file: logger.debug("loading local file for GSP shape data") shape_gpd = gpd.read_file(local_file) + # rename the columns to full name + shape_gpd.rename(columns=rename_load_columns, inplace=True) logger.debug("loading local file for GSP shape data:done") else: # call ESO website. There is a possibility that this API will be replaced and its unclear if this original API will @@ -105,10 +115,28 @@ def get_gsp_shape_from_eso( ) with urlopen(url) as response: - shape_gpd = gpd.read_file(response).to_crs(WGS84_CRS) + shape_gpd = gpd.read_file(response) + + # calculate the centroid before using - to_crs + shape_gpd["centroid_x"] = shape_gpd["geometry"].centroid.x + shape_gpd["centroid_y"] = shape_gpd["geometry"].centroid.y + shape_gpd["centroid_lat"], shape_gpd["centroid_lon"] = osgb_to_lat_lon( + x=shape_gpd["centroid_x"], y=shape_gpd["centroid_y"] + ) + + # Decided not project the shape data to WGS84, as we want to keep + # all 'batch' data the same projection. + # However when plotting it may be useful to project to WGS84 + # i.e shape_gpd = shape_gpd.to_crs(WGS84_CRS) if save_local_file: - shape_gpd.to_file(local_file) + + # rename the columns to less than 10 characters + shape_gpd_to_save = shape_gpd.copy() + shape_gpd_to_save.rename(columns=rename_save_columns, inplace=True) + + # save file + shape_gpd_to_save.to_file(local_file) # sort shape_gpd = shape_gpd.sort_values(by=["RegionID"]) diff --git a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py index 6577110d..10325943 100644 --- a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py +++ b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py @@ -78,7 +78,7 @@ def load(self): # make location x,y in osgb self.metadata["location_x"], self.metadata["location_y"] = lat_lon_to_osgb( - lat=self.metadata["centroid_y"], lon=self.metadata["centroid_x"] + lat=self.metadata["centroid_lat"], lon=self.metadata["centroid_lon"] ) # load gsp data from file / gcp diff --git a/tests/data_sources/gsp/test_gsp_data_source.py b/tests/data_sources/gsp/test_gsp_data_source.py index ef47da18..2070ccf8 100644 --- a/tests/data_sources/gsp/test_gsp_data_source.py +++ b/tests/data_sources/gsp/test_gsp_data_source.py @@ -1,17 +1,12 @@ import os -import pytz -from nowcasting_dataset.data_sources.gsp.pvlive import load_pv_gsp_raw_data_from_pvlive -from nowcasting_dataset.data_sources.gsp.eso import ( - get_gsp_metadata_from_eso, - get_gsp_shape_from_eso, -) -import pandas as pd -import geopandas as gpd from datetime import datetime +import pandas as pd + import nowcasting_dataset from nowcasting_dataset.consts import T0_DT from nowcasting_dataset.data_sources.gsp.gsp_data_source import GSPDataSource +from nowcasting_dataset.geospatial import osgb_to_lat_lon def test_gsp_pv_data_source_init(): @@ -46,6 +41,16 @@ def test_gsp_pv_data_source_get_locations_for_batch(): locations_x, locations_y = gsp.get_locations_for_batch(t0_datetimes=gsp.gsp_power.index[0:10]) assert len(locations_x) == len(locations_y) + # This makes sure it is not in lat/lon. + # Note that OSGB could be <= than 90, but that would mean a location in the middle of the sea, + # which is impossible for GSP data + assert locations_x[0] > 90 + assert locations_y[0] > 90 + + lat, lon = osgb_to_lat_lon(locations_x, locations_y) + + assert 0 < lat[0] < 90 # this makes sure it is in lat/lon + assert -90 < lon[0] < 90 # this makes sure it is in lat/lon def test_gsp_pv_data_source_get_example():