diff --git a/nowcasting_dataset/config/model.py b/nowcasting_dataset/config/model.py index f6535758..db49a44a 100644 --- a/nowcasting_dataset/config/model.py +++ b/nowcasting_dataset/config/model.py @@ -483,6 +483,15 @@ class Sun(DataSourceMixin): "gs://solar-pv-nowcasting-data/Sun/v1/sun.zarr/", description="Path to the Sun data source i.e Azimuth and Elevation", ) + load_live: bool = Field( + False, description="Option to load sun data on the fly, rather than from file" + ) + + elevation_limit: int = Field( + 10, + description="The limit to the elevations for examples. " + "Datetimes below this limits will be ignored", + ) class InputData(Base): diff --git a/nowcasting_dataset/data_sources/sun/sun_data_source.py b/nowcasting_dataset/data_sources/sun/sun_data_source.py index 6a045793..d9459366 100644 --- a/nowcasting_dataset/data_sources/sun/sun_data_source.py +++ b/nowcasting_dataset/data_sources/sun/sun_data_source.py @@ -1,6 +1,7 @@ """ Loading Raw data """ import logging from dataclasses import dataclass +from datetime import datetime from numbers import Number from pathlib import Path from typing import List, Tuple, Union @@ -14,7 +15,7 @@ from nowcasting_dataset.data_sources.metadata.metadata_model import SpaceTimeLocation from nowcasting_dataset.data_sources.sun.raw_data_load_save import load_from_zarr, x_y_to_name from nowcasting_dataset.data_sources.sun.sun_model import Sun -from nowcasting_dataset.geospatial import calculate_azimuth_and_elevation_angle +from nowcasting_dataset.geospatial import calculate_azimuth_and_elevation_angle, osgb_to_lat_lon logger = logging.getLogger(__name__) @@ -24,6 +25,8 @@ class SunDataSource(DataSource): """Add azimuth and elevation angles of the sun.""" zarr_path: Union[str, Path] + load_live: bool = False + elevation_limit: int = 10 def __post_init__(self): """Post Init""" @@ -37,7 +40,8 @@ def get_data_model_for_batch(): def check_input_paths_exist(self) -> None: """Check input paths exist. If not, raise a FileNotFoundError.""" - nd_fs_utils.check_path_exists(self.zarr_path) + if not self.load_live: + nd_fs_utils.check_path_exists(self.zarr_path) def get_example(self, location: SpaceTimeLocation) -> xr.Dataset: """ @@ -64,26 +68,40 @@ def get_example(self, location: SpaceTimeLocation) -> xr.Dataset: start_dt = self._get_start_dt(t0_datetime_utc) end_dt = self._get_end_dt(t0_datetime_utc) - # The names of the columns get truncated when saving, therefore we need to look for the - # name of the columns near the location we are looking for - locations = np.array( - [[float(z.split(",")[0]), float(z.split(",")[1])] for z in self.azimuth.columns] - ) - location = locations[ - np.isclose(locations[:, 0], x_center_osgb) & np.isclose(locations[:, 1], y_center_osgb) - ] - # lets make sure there is atleast one - assert len(location) > 0 - # Take the first location, and x and y coordinates are the first and center entries in - # this array. - location = location[0] - # make name of column to pull data from. The columns name will be about - # something like '22222.555,3333.6666' - name = x_y_to_name(x=location[0], y=location[1]) - - del x_center_osgb, y_center_osgb - azimuth = self.azimuth.loc[start_dt:end_dt][name] - elevation = self.elevation.loc[start_dt:end_dt][name] + if not self.load_live: + + # The names of the columns get truncated when saving, therefore we need to look for the + # name of the columns near the location we are looking for + locations = np.array( + [[float(z.split(",")[0]), float(z.split(",")[1])] for z in self.azimuth.columns] + ) + location = locations[ + np.isclose(locations[:, 0], x_center_osgb) + & np.isclose(locations[:, 1], y_center_osgb) + ] + # lets make sure there is atleast one + assert len(location) > 0 + # Take the first location, and x and y coordinates are the first and center entries in + # this array. + location = location[0] + # make name of column to pull data from. The columns name will be about + # something like '22222.555,3333.6666' + name = x_y_to_name(x=location[0], y=location[1]) + + del x_center_osgb, y_center_osgb + azimuth = self.azimuth.loc[start_dt:end_dt][name] + elevation = self.elevation.loc[start_dt:end_dt][name] + + else: + + latitude, longitude = osgb_to_lat_lon(x=x_center_osgb, y=y_center_osgb) + + datestamps = pd.date_range(start=start_dt, end=end_dt, freq="5T").tolist() + azimuth_elevation = calculate_azimuth_and_elevation_angle( + latitude=latitude, longitude=longitude, datestamps=datestamps + ) + azimuth = azimuth_elevation["azimuth"] + elevation = azimuth_elevation["elevation"] azimuth = azimuth.to_xarray().rename({"index": "time"}) elevation = elevation.to_xarray().rename({"index": "time"}) @@ -97,7 +115,8 @@ def _load(self): logger.info(f"Loading Sun data from {self.zarr_path}") - self.azimuth, self.elevation = load_from_zarr(zarr_path=self.zarr_path) + if not self.load_live: + self.azimuth, self.elevation = load_from_zarr(zarr_path=self.zarr_path) def get_locations( self, t0_datetimes_utc: pd.DatetimeIndex @@ -112,13 +131,20 @@ def datetime_index(self) -> pd.DatetimeIndex: latitude = 51 longitude = 0 + if not self.load_live: + datestamps = self.elevation.index + else: + datestamps = pd.date_range( + datetime(2019, 1, 1), datetime(2019, 12, 31, 23, 55), freq="5T" + ) + # get elevation for all datetimes azimuth_elevation = calculate_azimuth_and_elevation_angle( - latitude=latitude, longitude=longitude, datestamps=self.elevation.index + latitude=latitude, longitude=longitude, datestamps=datestamps ) # only select elevations > 10 - mask = azimuth_elevation["elevation"] >= 10 + mask = azimuth_elevation["elevation"] >= self.elevation_limit # create warnings, so we know how many datetimes will be dropped. # Should be slightly more than half as its night time 50% of the time @@ -128,7 +154,7 @@ def datetime_index(self) -> pd.DatetimeIndex: f"out of {len(azimuth_elevation)} as elevation is < 10" ) - datetimes = self.elevation[mask].index + datetimes = datestamps[mask] # Sun data is only for 2019, so to expand on these by # repeating data from 2014 to 2023 diff --git a/tests/data_sources/sun/test_sun_data_source.py b/tests/data_sources/sun/test_sun_data_source.py index ecadaec6..241abb09 100644 --- a/tests/data_sources/sun/test_sun_data_source.py +++ b/tests/data_sources/sun/test_sun_data_source.py @@ -8,7 +8,8 @@ def test_init(test_data_folder): # noqa 103 zarr_path = test_data_folder + "/sun/test.zarr" - _ = SunDataSource(zarr_path=zarr_path, history_minutes=30, forecast_minutes=60) + sun = SunDataSource(zarr_path=zarr_path, history_minutes=30, forecast_minutes=60) + _ = sun.datetime_index() def test_get_example(test_data_folder): # noqa 103 @@ -43,3 +44,22 @@ def test_get_example_different_year(test_data_folder): # noqa 103 assert len(example.elevation) == 19 assert len(example.azimuth) == 19 + + +def test_get_load_live(): # noqa 103 + + sun_data_source = SunDataSource( + zarr_path="", history_minutes=30, forecast_minutes=60, load_live=True + ) + _ = sun_data_source.datetime_index() + + x = 256895.63164759654 + y = 666180.3018829626 + start_dt = pd.Timestamp("2021-04-01 12:00:00.000") + + example = sun_data_source.get_example( + location=SpaceTimeLocation(t0_datetime_utc=start_dt, x_center_osgb=x, y_center_osgb=y) + ) + + assert len(example.elevation) == 19 + assert len(example.azimuth) == 19