Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 46 additions & 4 deletions nowcasting_dataset/config/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
are used to validate the values of the data itself.

"""
import logging
from datetime import datetime
from pathlib import Path
from typing import Optional, Union
Expand All @@ -32,6 +33,8 @@
IMAGE_SIZE_PIXELS_FIELD = Field(64, description="The number of pixels of the region of interest.")
METERS_PER_PIXEL_FIELD = Field(2000, description="The number of meters per pixel.")

logger = logging.getLogger(__name__)


class General(BaseModel):
"""General pydantic model"""
Expand Down Expand Up @@ -86,7 +89,42 @@ def seq_length_60_minutes(self):
return int((self.history_minutes + self.forecast_minutes) / 60 + 1)


class PV(DataSourceMixin):
class StartEndDatetimeMixin(BaseModel):
"""Mixin class to add start and end date"""

start_datetime: datetime = Field(
datetime(2020, 1, 1),
description="Load date from data sources from this date. "
"If None, this will get overwritten by InputData.start_date. ",
)
end_datetime: datetime = Field(
datetime(2021, 9, 1),
description="Load date from data sources up to this date. "
"If None, this will get overwritten by InputData.start_date. ",
)

@root_validator
def check_start_and_end_datetime(cls, values):
"""
Make sure start datetime is before end datetime
"""

start_datetime = values["start_datetime"]
end_datetime = values["end_datetime"]

# check start datetime is less than end datetime
if start_datetime >= end_datetime:
message = (
f"Start datetime ({start_datetime}) "
f"should be less than end datetime ({end_datetime})"
)
logger.error(message)
assert Exception(message)

return values


class PV(DataSourceMixin, StartEndDatetimeMixin):
"""PV configuration model"""

pv_filename: str = Field(
Expand Down Expand Up @@ -163,7 +201,7 @@ class NWP(DataSourceMixin):
nwp_meters_per_pixel: int = METERS_PER_PIXEL_FIELD


class GSP(DataSourceMixin):
class GSP(DataSourceMixin, StartEndDatetimeMixin):
"""GSP configuration model"""

gsp_zarr_path: str = Field("gs://solar-pv-nowcasting-data/PV/GSP/v2/pv_gsp.zarr")
Expand Down Expand Up @@ -372,8 +410,12 @@ class Process(BaseModel):

@validator("local_temp_path")
def local_temp_path_to_path_object_expanduser(cls, v):
"""Convert the path in string format to a `pathlib.PosixPath` object
and call `expanduser` on the latter."""
"""
Convert temp path to Path

Convert the path in string format to a `pathlib.PosixPath` object
and call `expanduser` on the latter.
"""
return Path(v).expanduser()


Expand Down
10 changes: 3 additions & 7 deletions nowcasting_dataset/data_sources/gsp/gsp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,9 @@ class GSPDataSource(ImageDataSource):
# zarr_path of where the gsp data is stored
zarr_path: Union[str, Path]
# start datetime, this can be None
# TODO: Issue #425: Use config to set start_dt and end_dt.
start_dt: Optional[datetime] = pd.Timestamp("2020-01-01")
start_datetime: Optional[datetime] = None
# end datetime, this can be None
# TODO: Issue #425: Use config to set start_dt and end_dt.
end_dt: Optional[datetime] = pd.Timestamp("2022-01-01")
end_datetime: Optional[datetime] = None
# the threshold where we only taken gsp's with a maximum power, above this value.
threshold_mw: int = 0
# get the data for the gsp at the center too.
Expand All @@ -68,8 +66,6 @@ def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
Set random seed and load data
"""
super().__post_init__(image_size_pixels, meters_per_pixel)
# TODO: Issue #425: Remove this logger warning.
logger.warning("GSPDataSource is using hard-coded start_dt and end_dt!")
self.rng = np.random.default_rng()
self.load()

Expand Down Expand Up @@ -102,7 +98,7 @@ def load(self):

# load gsp data from file / gcp
self.gsp_power, self.gsp_capacity = load_solar_gsp_data(
self.zarr_path, start_dt=self.start_dt, end_dt=self.end_dt
self.zarr_path, start_dt=self.start_datetime, end_dt=self.end_datetime
)

# drop any gsp below a threshold mw. This is to get rid of any small GSP where
Expand Down
11 changes: 6 additions & 5 deletions nowcasting_dataset/data_sources/pv/pv_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ class PVDataSource(ImageDataSource):
filename: Union[str, Path]
metadata_filename: Union[str, Path]
# TODO: Issue #425: Use config to set start_dt and end_dt.
start_dt: Optional[datetime.datetime] = pd.Timestamp("2020-01-01")
end_dt: Optional[datetime.datetime] = pd.Timestamp("2022-01-01")
start_datetime: Optional[datetime.datetime] = None
end_datetime: Optional[datetime.datetime] = None
random_pv_system_for_given_location: Optional[bool] = True
#: Each example will always have this many PV systems.
#: If less than this number exist in the data then pad with NaNs.
Expand All @@ -48,8 +48,7 @@ class PVDataSource(ImageDataSource):
def __post_init__(self, image_size_pixels: int, meters_per_pixel: int):
"""Post Init"""
super().__post_init__(image_size_pixels, meters_per_pixel)
# TODO: Issue #425: Remove this logger warning.
logger.warning("PVDataSource is using hard-coded start_dt and end_dt!")

self.rng = np.random.default_rng()
self.load()

Expand Down Expand Up @@ -101,7 +100,9 @@ def _load_pv_power(self):

logger.debug(f"Loading PV Power data from {self.filename}")

pv_power = load_solar_pv_data(self.filename, start_dt=self.start_dt, end_dt=self.end_dt)
pv_power = load_solar_pv_data(
self.filename, start_dt=self.start_datetime, end_dt=self.end_datetime
)

# A bit of hand-crafted cleaning
if 30248 in pv_power.columns:
Expand Down
20 changes: 10 additions & 10 deletions tests/data_sources/gsp/test_gsp_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ def test_gsp_pv_data_source_init():

_ = GSPDataSource(
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
start_dt=datetime(2020, 4, 1),
end_dt=datetime(2020, 4, 2),
start_datetime=datetime(2020, 4, 1),
end_datetime=datetime(2020, 4, 2),
history_minutes=30,
forecast_minutes=60,
image_size_pixels=64,
Expand All @@ -33,8 +33,8 @@ def test_gsp_pv_data_source_get_locations():

gsp = GSPDataSource(
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
start_dt=datetime(2020, 4, 1),
end_dt=datetime(2020, 4, 2),
start_datetime=datetime(2020, 4, 1),
end_datetime=datetime(2020, 4, 2),
history_minutes=30,
forecast_minutes=60,
image_size_pixels=64,
Expand Down Expand Up @@ -65,8 +65,8 @@ def test_gsp_pv_data_source_get_example():

gsp = GSPDataSource(
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
start_dt=datetime(2020, 4, 1),
end_dt=datetime(2020, 4, 2),
start_datetime=datetime(2020, 4, 1),
end_datetime=datetime(2020, 4, 2),
history_minutes=30,
forecast_minutes=60,
image_size_pixels=64,
Expand All @@ -91,8 +91,8 @@ def test_gsp_pv_data_source_get_batch():

gsp = GSPDataSource(
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
start_dt=datetime(2020, 4, 1),
end_dt=datetime(2020, 4, 2),
start_datetime=datetime(2020, 4, 1),
end_datetime=datetime(2020, 4, 2),
history_minutes=30,
forecast_minutes=60,
image_size_pixels=64,
Expand Down Expand Up @@ -121,8 +121,8 @@ def test_drop_gsp_north_of_boundary(test_data_folder):

gsp = GSPDataSource(
zarr_path=f"{test_data_folder}/gsp/test.zarr",
start_dt=datetime(2020, 4, 1),
end_dt=datetime(2020, 4, 2),
start_datetime=datetime(2020, 4, 1),
end_datetime=datetime(2020, 4, 2),
history_minutes=30,
forecast_minutes=60,
image_size_pixels=64,
Expand Down
8 changes: 4 additions & 4 deletions tests/data_sources/test_pv_data_source.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def test_get_example_and_batch(): # noqa: D103
meters_per_pixel=2000,
filename=PV_DATA_FILENAME,
metadata_filename=PV_METADATA_FILENAME,
start_dt=datetime.fromisoformat("2020-04-01 00:00:00.000"),
end_dt=datetime.fromisoformat("2020-04-02 00:00:00.000"),
start_datetime=datetime.fromisoformat("2020-04-01 00:00:00.000"),
end_datetime=datetime.fromisoformat("2020-04-02 00:00:00.000"),
load_azimuth_and_elevation=False,
load_from_gcs=False,
)
Expand Down Expand Up @@ -75,8 +75,8 @@ def test_passive():
pv = PVDataSource(
filename=filename,
metadata_filename=filename_metadata,
start_dt=datetime(2020, 3, 28),
end_dt=datetime(2020, 4, 1),
start_datetime=datetime(2020, 3, 28),
end_datetime=datetime(2020, 4, 1),
history_minutes=60,
forecast_minutes=30,
image_size_pixels=64,
Expand Down
8 changes: 4 additions & 4 deletions tests/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ def test_sample_spatial_and_temporal_locations_for_examples(): # noqa: D103

gsp = GSPDataSource(
zarr_path=f"{local_path}/tests/data/gsp/test.zarr",
start_dt=datetime(2020, 4, 1),
end_dt=datetime(2020, 4, 2),
start_datetime=datetime(2020, 4, 1),
end_datetime=datetime(2020, 4, 2),
history_minutes=30,
forecast_minutes=60,
image_size_pixels=64,
Expand Down Expand Up @@ -139,8 +139,8 @@ def test_batches():

gsp = GSPDataSource(
zarr_path=filename,
start_dt=datetime(2020, 4, 1),
end_dt=datetime(2020, 4, 2),
start_datetime=datetime(2020, 4, 1),
end_datetime=datetime(2020, 4, 2),
history_minutes=30,
forecast_minutes=60,
image_size_pixels=64,
Expand Down