Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions notebooks/2021-09/2021-09-14/gsp_centroid.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,23 @@
)
import plotly.graph_objects as go
import json
from nowcasting_dataset.geospatial import WGS84_CRS


# load data
shape_data_raw = get_gsp_metadata_from_eso()
shape_data_raw = shape_data_raw.sort_values(by=['RegionName'])
shape_data_raw['Amount'] = 0
shape_data_raw = shape_data_raw.to_crs(WGS84_CRS)
shape_data_raw = shape_data_raw.sort_values(by=["RegionName"])
shape_data_raw["Amount"] = 0

# for index in range(0, len(shape_data_raw)):
for index in range(140, 150):

# just select the first one
shape_data = shape_data_raw.iloc[index : index + 1]
shapes_dict = json.loads(shape_data["geometry"].to_json())
lon = shape_data["centroid_x"].iloc[0]
lat = shape_data["centroid_y"].iloc[0]
lon = shape_data["centroid_lon"].iloc[0]
lat = shape_data["centroid_lat"].iloc[0]

gsp_lon = shape_data["gsp_lon"].iloc[0]
gsp_lat = shape_data["gsp_lat"].iloc[0]
Expand Down Expand Up @@ -47,7 +50,11 @@
)
fig.add_trace(
go.Scattermapbox(
lon=[gsp_lon], lat=[gsp_lat], mode="markers", name="GSP Location", marker=dict(size=[10])
lon=[gsp_lon],
lat=[gsp_lat],
mode="markers",
name="GSP Location",
marker=dict(size=[10]),
)
)
fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})
Expand All @@ -56,6 +63,3 @@
# fig.show(renderer="browser")
fig.write_html(f"images/{region_name}.html")
fig.write_image(f"images/{region_name}_{index}.png")



42 changes: 35 additions & 7 deletions nowcasting_dataset/data_sources/gsp/eso.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,22 @@
import geopandas as gpd
import pandas as pd

from nowcasting_dataset.geospatial import WGS84_CRS
from nowcasting_dataset.geospatial import osgb_to_lat_lon


logger = logging.getLogger(__name__)

# When saving a file, the columns need to be less than 10 characters -
# - https://github.com/geopandas/geopandas/issues/1417
# - https://en.wikipedia.org/wiki/Shapefile#Limitations
rename_save_columns = {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe in another PR, we could make it so we don't have to rename the columns, but instead we use the same column names throughout the code?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea I did this as the column names seemed to be limited to 10, but let me check this again

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And then its a balance of a bit of extra code here to give full verbose, compared to a shorter column name and it not being as verbose. Personally I like the fully verbose version

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, sorry, my mistake... I hadn't realised shapefiles have these limitations! Thanks for explaining. Cool, let's leave the code as-is!

"centroid_x": "cen_x",
"centroid_y": "cen_y",
"centroid_lat": "cen_lat",
"centroid_lon": "cen_lon",
}
rename_load_columns = {v: k for k, v in rename_save_columns.items()}


def get_gsp_metadata_from_eso(calculate_centroid: bool = True) -> pd.DataFrame:
"""
Expand Down Expand Up @@ -63,10 +75,6 @@ def get_gsp_metadata_from_eso(calculate_centroid: bool = True) -> pd.DataFrame:
metadata.merge(shape_data, right_on="RegionID", left_on="region_id", how="left")
)

# make centroid
metadata["centroid_x"] = metadata["geometry"].centroid.x
metadata["centroid_y"] = metadata["geometry"].centroid.y

return metadata


Expand Down Expand Up @@ -95,6 +103,8 @@ def get_gsp_shape_from_eso(
if load_local_file:
logger.debug("loading local file for GSP shape data")
shape_gpd = gpd.read_file(local_file)
# rename the columns to full name
shape_gpd.rename(columns=rename_load_columns, inplace=True)
logger.debug("loading local file for GSP shape data:done")
else:
# call ESO website. There is a possibility that this API will be replaced and its unclear if this original API will
Expand All @@ -105,10 +115,28 @@ def get_gsp_shape_from_eso(
)

with urlopen(url) as response:
shape_gpd = gpd.read_file(response).to_crs(WGS84_CRS)
shape_gpd = gpd.read_file(response)

# calculate the centroid before using - to_crs
shape_gpd["centroid_x"] = shape_gpd["geometry"].centroid.x
shape_gpd["centroid_y"] = shape_gpd["geometry"].centroid.y
shape_gpd["centroid_lat"], shape_gpd["centroid_lon"] = osgb_to_lat_lon(
x=shape_gpd["centroid_x"], y=shape_gpd["centroid_y"]
)

# Decided not project the shape data to WGS84, as we want to keep
# all 'batch' data the same projection.
# However when plotting it may be useful to project to WGS84
# i.e shape_gpd = shape_gpd.to_crs(WGS84_CRS)

if save_local_file:
shape_gpd.to_file(local_file)

# rename the columns to less than 10 characters
shape_gpd_to_save = shape_gpd.copy()
shape_gpd_to_save.rename(columns=rename_save_columns, inplace=True)

# save file
shape_gpd_to_save.to_file(local_file)

# sort
shape_gpd = shape_gpd.sort_values(by=["RegionID"])
Expand Down
2 changes: 1 addition & 1 deletion nowcasting_dataset/data_sources/gsp/gsp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def load(self):

# make location x,y in osgb
self.metadata["location_x"], self.metadata["location_y"] = lat_lon_to_osgb(
lat=self.metadata["centroid_y"], lon=self.metadata["centroid_x"]
lat=self.metadata["centroid_lat"], lon=self.metadata["centroid_lon"]
)

# load gsp data from file / gcp
Expand Down
21 changes: 13 additions & 8 deletions tests/data_sources/gsp/test_gsp_data_source.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,12 @@
import os
import pytz
from nowcasting_dataset.data_sources.gsp.pvlive import load_pv_gsp_raw_data_from_pvlive
from nowcasting_dataset.data_sources.gsp.eso import (
get_gsp_metadata_from_eso,
get_gsp_shape_from_eso,
)
import pandas as pd
import geopandas as gpd
from datetime import datetime

import pandas as pd

import nowcasting_dataset
from nowcasting_dataset.consts import T0_DT
from nowcasting_dataset.data_sources.gsp.gsp_data_source import GSPDataSource
from nowcasting_dataset.geospatial import osgb_to_lat_lon


def test_gsp_pv_data_source_init():
Expand Down Expand Up @@ -46,6 +41,16 @@ def test_gsp_pv_data_source_get_locations_for_batch():
locations_x, locations_y = gsp.get_locations_for_batch(t0_datetimes=gsp.gsp_power.index[0:10])

assert len(locations_x) == len(locations_y)
# This makes sure it is not in lat/lon.
# Note that OSGB could be <= than 90, but that would mean a location in the middle of the sea,
# which is impossible for GSP data
assert locations_x[0] > 90
assert locations_y[0] > 90

lat, lon = osgb_to_lat_lon(locations_x, locations_y)

assert 0 < lat[0] < 90 # this makes sure it is in lat/lon
assert -90 < lon[0] < 90 # this makes sure it is in lat/lon


def test_gsp_pv_data_source_get_example():
Expand Down