diff --git a/gcp.yaml b/gcp.yaml new file mode 100644 index 00000000..c0d7c811 --- /dev/null +++ b/gcp.yaml @@ -0,0 +1,66 @@ +general: + cloud: gcp + description: example configuration + name: example +git: null +input_data: + default_forecast_minutes: 60 + default_history_minutes: 30 + gsp: + forecast_minutes: 60 + gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/GSP/v0/pv_gsp.zarr + history_minutes: 30 + nwp: + forecast_minutes: 60 + history_minutes: 30 + nwp_channels: + - t + - dswrf + - prate + - r + - sde + - si10 + - vis + - lcc + - mcc + - hcc + nwp_image_size_pixels: 64 + nwp_zarr_path: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr + pv: + forecast_minutes: 60 + history_minutes: 30 + solar_pv_data_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_timeseries_batch.nc + solar_pv_metadata_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_metadata.csv + satellite: + forecast_minutes: 60 + history_minutes: 30 + sat_channels: + - HRV + - IR_016 + - IR_039 + - IR_087 + - IR_097 + - IR_108 + - IR_120 + - IR_134 + - VIS006 + - VIS008 + - WV_062 + - WV_073 + satellite_image_size_pixels: 64 + satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr + sun: + forecast_minutes: 60 + history_minutes: 30 + sun_zarr_path: gs://solar-pv-nowcasting-data/Sun/v0/sun.zarr/ + topographic: + forecast_minutes: 60 + history_minutes: 30 + topographic_filename: gs://solar-pv-nowcasting-data/Topographic/europe_dem_1km_osgb.tif +output_data: + filepath: gs://solar-pv-nowcasting-data/prepared_ML_training_data/v5/ +process: + batch_size: 32 + local_temp_path: ~/temp/ + seed: 1234 + upload_every_n_batches: 16 diff --git a/nowcasting_dataset/config/gcp.yaml b/nowcasting_dataset/config/gcp.yaml index ebddd2ff..59d748ee 100644 --- a/nowcasting_dataset/config/gcp.yaml +++ b/nowcasting_dataset/config/gcp.yaml @@ -1,46 +1,65 @@ general: - name: gcp - description: Configuration for Google Cloud + cloud: gcp + description: example configuration + name: example input_data: - nwp_zarr_path: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr - satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr - solar_pv_data_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_timeseries_batch.nc - solar_pv_metadata_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_metadata.csv - gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/GSP/v2/pv_gsp.zarr - topographic_filename: gs://solar-pv-nowcasting-data/Topographic/europe_dem_1km_osgb.tif - sun_zarr_path: gs://solar-pv-nowcasting-data/Sun/v0/sun.zarr + default_forecast_minutes: 60 + default_history_minutes: 30 + gsp: + forecast_minutes: 60 + gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/GSP/v2/pv_gsp.zarr + history_minutes: 30 + nwp: + forecast_minutes: 60 + history_minutes: 30 + nwp_channels: + - t + - dswrf + - prate + - r + - sde + - si10 + - vis + - lcc + - mcc + - hcc + nwp_image_size_pixels: 64 + nwp_zarr_path: gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr + pv: + forecast_minutes: 60 + history_minutes: 30 + solar_pv_data_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_timeseries_batch.nc + solar_pv_metadata_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_metadata.csv + satellite: + forecast_minutes: 60 + history_minutes: 30 + sat_channels: + - HRV + - IR_016 + - IR_039 + - IR_087 + - IR_097 + - IR_108 + - IR_120 + - IR_134 + - VIS006 + - VIS008 + - WV_062 + - WV_073 + satellite_image_size_pixels: 64 + satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr + sun: + forecast_minutes: 60 + history_minutes: 30 + sun_zarr_path: gs://solar-pv-nowcasting-data/Sun/v0/sun.zarr/ + topographic: + forecast_minutes: 60 + history_minutes: 30 + topographic_filename: gs://solar-pv-nowcasting-data/Topographic/europe_dem_1km_osgb.tif output_data: filepath: gs://solar-pv-nowcasting-data/prepared_ML_training_data/v7/ process: + batch_size: 32 local_temp_path: ~/temp/ seed: 1234 - batch_size: 32 upload_every_n_batches: 16 - forecast_minutes: 60 - history_minutes: 30 - satellite_image_size_pixels: 64 - nwp_image_size_pixels: 64 - nwp_channels: - - t - - dswrf - - prate - - r - - sde - - si10 - - vis - - lcc - - mcc - - hcc - sat_channels: - - HRV - - IR_016 - - IR_039 - - IR_087 - - IR_097 - - IR_108 - - IR_120 - - IR_134 - - VIS006 - - VIS008 - - WV_062 - - WV_073 diff --git a/nowcasting_dataset/config/model.py b/nowcasting_dataset/config/model.py index a16f2e91..00a5658d 100644 --- a/nowcasting_dataset/config/model.py +++ b/nowcasting_dataset/config/model.py @@ -1,18 +1,23 @@ """ Configuration model for the dataset """ from datetime import datetime from typing import Optional +import logging import git from pathy import Pathy from pydantic import BaseModel, Field -from pydantic import validator +from pydantic import validator, root_validator from nowcasting_dataset.consts import NWP_VARIABLE_NAMES -from nowcasting_dataset.consts import SAT_VARIABLE_NAMES +from nowcasting_dataset.consts import ( + SAT_VARIABLE_NAMES, + DEFAULT_N_GSP_PER_EXAMPLE, + DEFAULT_N_PV_SYSTEMS_PER_EXAMPLE, +) class General(BaseModel): - """ General pydantic model """ + """General pydantic model""" name: str = Field("example", description="The name of this configuration file.") description: str = Field( @@ -29,7 +34,7 @@ class General(BaseModel): class Git(BaseModel): - """ Git model """ + """Git model""" hash: str = Field(..., description="The git hash has for when a dataset is created.") message: str = Field(..., description="The git message has for when a dataset is created.") @@ -38,14 +43,35 @@ class Git(BaseModel): ) -class InputData(BaseModel): - """ - Input data model +class DataSourceMixin(BaseModel): + """Mixin class, to add forecast and history minutes""" - All paths must include the protocol prefix. For local files, - it's sufficient to just start with a '/'. For aws, start with 's3://', - for gcp start with 'gs://'. - """ + forecast_minutes: int = Field( + None, + ge=0, + description="how many minutes to forecast in the future. " + "If set to None, the value is defaulted to InputData.default_forecast_minutes", + ) + history_minutes: int = Field( + None, + ge=0, + description="how many historic minutes are used. " + "If set to None, the value is defaulted to InputData.default_history_minutes", + ) + + @property + def seq_length_30_minutes(self): + """How many steps are there in 30 minute datasets""" + return int((self.history_minutes + self.forecast_minutes) / 30 + 1) + + @property + def seq_length_5_minutes(self): + """How many steps are there in 5 minute datasets""" + return int((self.history_minutes + self.forecast_minutes) / 5 + 1) + + +class PV(DataSourceMixin): + """PV configuration model""" solar_pv_data_filename: str = Field( "gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_timeseries_batch.nc", @@ -55,34 +81,141 @@ class InputData(BaseModel): "gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_metadata.csv", description="The CSV file describing each PV system.", ) + n_gsp_per_example: int = Field( + DEFAULT_N_PV_SYSTEMS_PER_EXAMPLE, + description="The number of PV systems samples per example. " + "If there are less in the ROI then the data is padded with zeros. ", + ) + + +class Satellite(DataSourceMixin): + """Satellite configuration model""" satellite_zarr_path: str = Field( "gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr", description="The path which holds the satellite zarr.", ) + sat_channels: tuple = Field( + SAT_VARIABLE_NAMES, description="the satellite channels that are used" + ) + + satellite_image_size_pixels: int = Field(64, description="the size of the satellite images") + + +class NWP(DataSourceMixin): + """NWP configuration model""" + nwp_zarr_path: str = Field( "gs://solar-pv-nowcasting-data/NWP/UK_Met_Office/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr", description="The path which holds the NWP zarr.", ) - gsp_zarr_path: str = Field("gs://solar-pv-nowcasting-data/PV/GSP/v0/pv_gsp.zarr") + nwp_channels: tuple = Field(NWP_VARIABLE_NAMES, description="the channels used in the nwp data") + + nwp_image_size_pixels: int = Field(64, description="the size of the nwp images") + + +class GSP(DataSourceMixin): + """GSP configuration model""" + + gsp_zarr_path: str = Field("gs://solar-pv-nowcasting-data/PV/GSP/v2/pv_gsp.zarr") + n_gsp_per_example: int = Field( + DEFAULT_N_GSP_PER_EXAMPLE, + description="The number of GSP samples per example. " + "If there are less in the ROI then the data is padded with zeros. ", + ) + + @validator("history_minutes") + def history_minutes_divide_by_30(cls, v): + """Validate 'history_minutes'""" + assert v % 30 == 0 # this means it also divides by 5 + return v + + @validator("forecast_minutes") + def forecast_minutes_divide_by_30(cls, v): + """Validate 'forecast_minutes'""" + assert v % 30 == 0 # this means it also divides by 5 + return v + + +class Topographic(DataSourceMixin): + """Topographic configuration model""" + topographic_filename: str = Field( "gs://solar-pv-nowcasting-data/Topographic/europe_dem_1km_osgb.tif", description="Path to the GeoTIFF Topographic data source", ) + +class Sun(DataSourceMixin): + """Sun configuration model""" + sun_zarr_path: str = Field( "gs://solar-pv-nowcasting-data/Sun/v0/sun.zarr/", description="Path to the Sun data source i.e Azimuth and Elevation", ) +class InputData(BaseModel): + """ + Input data model + + All paths must include the protocol prefix. For local files, + it's sufficient to just start with a '/'. For aws, start with 's3://', + for gcp start with 'gs://'. + """ + + pv: PV = PV() + satellite: Satellite = Satellite() + nwp: NWP = NWP() + gsp: GSP = GSP() + topographic: Topographic = Topographic() + sun: Sun = Sun() + + default_forecast_minutes: int = Field( + 60, + ge=0, + description="how many minutes to forecast in the future. " + "This sets the default for all the data sources if they are not set.", + ) + default_history_minutes: int = Field( + 30, + ge=0, + description="how many historic minutes are used. " + "This sets the default for all the data sources if they are not set.", + ) + + @property + def default_seq_length_5_minutes(self): + """How many steps are there in 5 minute datasets""" + return int((self.default_history_minutes + self.default_forecast_minutes) / 5 + 1) + + @root_validator + def set_forecast_and_history_minutes(cls, values): + """ + Set default history and forecast values, if needed. + + Run through the different data sources and if the forecast or history minutes are not set, + then set them to the default values + """ + + for data_source_name in ["pv", "nwp", "satellite", "gsp", "topographic", "sun"]: + + if values[data_source_name].forecast_minutes is None: + values[data_source_name].forecast_minutes = values["default_forecast_minutes"] + + if values[data_source_name].history_minutes is None: + values[data_source_name].history_minutes = values["default_history_minutes"] + + return values + + class OutputData(BaseModel): - """ Output data model """ + """Output data model""" filepath: str = Field( - "gs://solar-pv-nowcasting-data/prepared_ML_training_data/v5/", + "gs://solar-pv-nowcasting-data/prepared_ML_training_data/v7/", description=( "Where the data is saved to. If this is running on the cloud then should include" " 'gs://' or 's3://'" @@ -91,7 +224,7 @@ class OutputData(BaseModel): class Process(BaseModel): - """ Pydantic model of how the data is processed """ + """Pydantic model of how the data is processed""" seed: int = Field(1234, description="Random seed, so experiments can be repeatable") batch_size: int = Field(32, description="the number of examples per batch") @@ -102,46 +235,12 @@ class Process(BaseModel): " If 0 then write batches directly to output_data.filepath, not to a temp directory." ), ) - forecast_minutes: int = Field( - 60, ge=0, description="how many minutes to forecast in the future" - ) - history_minutes: int = Field(30, ge=0, description="how many historic minutes are used") - satellite_image_size_pixels: int = Field(64, description="the size of the satellite images") - nwp_image_size_pixels: int = Field(64, description="the size of the nwp images") - sat_channels: tuple = Field( - SAT_VARIABLE_NAMES, description="the satellite channels that are used" - ) - nwp_channels: tuple = Field(NWP_VARIABLE_NAMES, description="the channels used in the nwp data") local_temp_path: str = Field("~/temp/") - # TODO: Remove! - @property - def seq_length_30_minutes(self): - """ How many steps are there in 30 minute datasets """ - return int((self.history_minutes + self.forecast_minutes) / 30 + 1) - - # TODO: Remove! - @property - def seq_length_5_minutes(self): - """ How many steps are there in 5 minute datasets """ - return int((self.history_minutes + self.forecast_minutes) / 5 + 1) - - @validator("history_minutes") - def history_minutes_divide_by_30(cls, v): - """ Validate 'history_minutes' """ - assert v % 30 == 0 # this means it also divides by 5 - return v - - @validator("forecast_minutes") - def forecast_minutes_divide_by_30(cls, v): - """ Validate 'forecast_minutes' """ - assert v % 30 == 0 # this means it also divides by 5 - return v - class Configuration(BaseModel): - """ Configuration model for the dataset """ + """Configuration model for the dataset""" general: General = General() input_data: InputData = InputData() @@ -153,17 +252,19 @@ def set_base_path(self, base_path: str): """Append base_path to all paths. Mostly used for testing.""" base_path = Pathy(base_path) path_attrs = [ - "solar_pv_data_filename", - "solar_pv_metadata_filename", - "satellite_zarr_path", - "nwp_zarr_path", - "gsp_zarr_path", + "pv.solar_pv_data_filename", + "pv.solar_pv_metadata_filename", + "satellite.satellite_zarr_path", + "nwp.nwp_zarr_path", + "gsp.gsp_zarr_path", ] - for attr_name in path_attrs: - path = getattr(self.input_data, attr_name) + for cls_and_attr_name in path_attrs: + cls_name, attribute = cls_and_attr_name.split(".") + cls = getattr(self.input_data, cls_name) + path = getattr(getattr(self.input_data, cls_name), attribute) path = base_path / path - setattr(self.input_data, attr_name, path) - print(path) + setattr(cls, attribute, path) + setattr(self.input_data, cls_name, cls) def set_git_commit(configuration: Configuration): diff --git a/nowcasting_dataset/config/on_premises.yaml b/nowcasting_dataset/config/on_premises.yaml index 9b3583d7..bae87205 100644 --- a/nowcasting_dataset/config/on_premises.yaml +++ b/nowcasting_dataset/config/on_premises.yaml @@ -1,45 +1,51 @@ general: - name: on_premises description: Config for producing batches on OCF's on-premises hardware. + name: on_premises input_data: - nwp_zarr_path: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/NWP/UK_Met_Office/UKV/zarr/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr - satellite_zarr_path: /mnt/storage_a/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/satellite/EUMETSAT/SEVIRI_RSS/zarr/all_zarr_int16_single_timestep.zarr - solar_pv_data_filename: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/PV/PVOutput.org/UK_PV_timeseries_batch.nc - solar_pv_metadata_filename: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/PV/PVOutput.org/UK_PV_metadata.csv - gsp_zarr_path: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/PV/GSP/v2/pv_gsp.zarr - topographic_filename: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/Topographic/europe_dem_1km_osgb.tif - sun_zarr_path: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/Sun/v0/sun.zarr + default_forecast_minutes: 120 + default_history_minutes: 30 + gsp: + gsp_zarr_path: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/PV/GSP/v2/pv_gsp.zarr + nwp: + nwp_channels: + - t + - dswrf + - prate + - r + - sde + - si10 + - vis + - lcc + - mcc + - hcc + nwp_image_size_pixels: 64 + nwp_zarr_path: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/NWP/UK_Met_Office/UKV/zarr/UKV__2018-01_to_2019-12__chunks__variable10__init_time1__step1__x548__y704__.zarr + pv: + solar_pv_data_filename: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/PV/PVOutput.org/UK_PV_timeseries_batch.nc + solar_pv_metadata_filename: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/PV/PVOutput.org/UK_PV_metadata.csv + satellite: + sat_channels: + - HRV + - IR_016 + - IR_039 + - IR_087 + - IR_097 + - IR_108 + - IR_120 + - IR_134 + - VIS006 + - VIS008 + - WV_062 + - WV_073 + satellite_image_size_pixels: 64 + satellite_zarr_path: /mnt/storage_a/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/satellite/EUMETSAT/SEVIRI_RSS/zarr/all_zarr_int16_single_timestep.zarr + sun: + sun_zarr_path: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/Sun/v0/sun.zarr + topographic: + topographic_filename: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/Topographic/europe_dem_1km_osgb.tif output_data: filepath: /mnt/storage_b/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/prepared_ML_training_data/v8/ process: - seed: 1234 batch_size: 32 + seed: 1234 upload_every_n_batches: 0 # Write directly to output_data.filepath, not to a temp directory. - forecast_minutes: 120 - history_minutes: 30 - satellite_image_size_pixels: 64 - nwp_image_size_pixels: 64 - nwp_channels: - - t - - dswrf - - prate - - r - - sde - - si10 - - vis - - lcc - - mcc - - hcc - sat_channels: - - HRV - - IR_016 - - IR_039 - - IR_087 - - IR_097 - - IR_108 - - IR_120 - - IR_134 - - VIS006 - - VIS008 - - WV_062 - - WV_073 diff --git a/nowcasting_dataset/dataset/batch.py b/nowcasting_dataset/dataset/batch.py index 8b4f7b79..0b18fad7 100644 --- a/nowcasting_dataset/dataset/batch.py +++ b/nowcasting_dataset/dataset/batch.py @@ -90,34 +90,45 @@ def data_sources(self): def fake(configuration: Configuration = Configuration()): """ Make fake batch object """ batch_size = configuration.process.batch_size - seq_length_5 = configuration.process.seq_length_5_minutes - seq_length_30 = configuration.process.seq_length_30_minutes - image_size_pixels = configuration.process.satellite_image_size_pixels + satellite_image_size_pixels = configuration.input_data.satellite.satellite_image_size_pixels + nwp_image_size_pixels = configuration.input_data.nwp.nwp_image_size_pixels return Batch( batch_size=batch_size, satellite=satellite_fake( batch_size=batch_size, - seq_length_5=seq_length_5, - satellite_image_size_pixels=image_size_pixels, - number_sat_channels=len(configuration.process.sat_channels), + seq_length_5=configuration.input_data.satellite.seq_length_5_minutes, + satellite_image_size_pixels=satellite_image_size_pixels, + number_sat_channels=len(configuration.input_data.satellite.sat_channels), ), nwp=nwp_fake( batch_size=batch_size, - seq_length_5=seq_length_5, - image_size_pixels=image_size_pixels, - number_nwp_channels=len(configuration.process.nwp_channels), + seq_length_5=configuration.input_data.nwp.seq_length_5_minutes, + image_size_pixels=nwp_image_size_pixels, + number_nwp_channels=len(configuration.input_data.nwp.nwp_channels), ), metadata=metadata_fake(batch_size=batch_size), pv=pv_fake( - batch_size=batch_size, seq_length_5=seq_length_5, n_pv_systems_per_batch=128 + batch_size=batch_size, + seq_length_5=configuration.input_data.pv.seq_length_5_minutes, + n_pv_systems_per_batch=128, + ), + gsp=gsp_fake( + batch_size=batch_size, + seq_length_30=configuration.input_data.gsp.seq_length_30_minutes, + n_gsp_per_batch=32, + ), + sun=sun_fake( + batch_size=batch_size, + seq_length_5=configuration.input_data.sun.seq_length_5_minutes, ), - gsp=gsp_fake(batch_size=batch_size, seq_length_30=seq_length_30, n_gsp_per_batch=32), - sun=sun_fake(batch_size=batch_size, seq_length_5=seq_length_5), topographic=topographic_fake( - batch_size=batch_size, image_size_pixels=image_size_pixels + batch_size=batch_size, image_size_pixels=satellite_image_size_pixels + ), + datetime=datetime_fake( + batch_size=batch_size, + seq_length_5=configuration.input_data.default_seq_length_5_minutes, ), - datetime=datetime_fake(batch_size=batch_size, seq_length_5=seq_length_5), ) def save_netcdf(self, batch_i: int, path: Path): diff --git a/scripts/prepare_ml_data.py b/scripts/prepare_ml_data.py index 06883b03..583ee140 100755 --- a/scripts/prepare_ml_data.py +++ b/scripts/prepare_ml_data.py @@ -22,13 +22,10 @@ from nowcasting_dataset.dataset.datamodule import NowcastingDataModule -# from nowcasting_dataset.dataset.batch import write_batch_locally from nowcasting_dataset.data_sources.satellite.satellite_data_source import SAT_VARIABLE_NAMES from nowcasting_dataset.data_sources.nwp.nwp_data_source import NWP_VARIABLE_NAMES -from nowcasting_dataset.dataset.batch import Batch from pathy import Pathy from pathlib import Path -import fsspec import torch import os import numpy as np @@ -55,20 +52,20 @@ config = set_git_commit(config) # Solar PV data -PV_DATA_FILENAME = config.input_data.solar_pv_data_filename -PV_METADATA_FILENAME = config.input_data.solar_pv_metadata_filename +PV_DATA_FILENAME = config.input_data.pv.solar_pv_data_filename +PV_METADATA_FILENAME = config.input_data.pv.solar_pv_metadata_filename # Satellite data -SAT_ZARR_PATH = config.input_data.satellite_zarr_path +SAT_ZARR_PATH = config.input_data.satellite.satellite_zarr_path # Numerical weather predictions -NWP_ZARR_PATH = config.input_data.nwp_zarr_path +NWP_ZARR_PATH = config.input_data.nwp.nwp_zarr_path # GSP data -GSP_ZARR_PATH = config.input_data.gsp_zarr_path +GSP_ZARR_PATH = config.input_data.gsp.gsp_zarr_path # Topographic data -TOPO_TIFF_PATH = config.input_data.topographic_filename +TOPO_TIFF_PATH = config.input_data.topographic.topographic_filename # Paths for output data. DST_NETCDF4_PATH = Pathy(config.output_data.filepath) @@ -128,10 +125,10 @@ def get_data_module(): data_module = NowcastingDataModule( batch_size=config.process.batch_size, - history_minutes=config.process.history_minutes, #: Number of minutes of history, not including t0. - forecast_minutes=config.process.forecast_minutes, #: Number of minutes of forecast. - satellite_image_size_pixels=config.process.satellite_image_size_pixels, - nwp_image_size_pixels=config.process.nwp_image_size_pixels, + history_minutes=config.input_data.default_history_minutes, #: Number of minutes of history, not including t0. + forecast_minutes=config.input_data.default_forecast_minutes, #: Number of minutes of forecast. + satellite_image_size_pixels=config.input_data.satellite.satellite_image_size_pixels, + nwp_image_size_pixels=config.input_data.nwp.nwp_image_size_pixels, nwp_channels=NWP_VARIABLE_NAMES, sat_channels=SAT_VARIABLE_NAMES, pv_power_filename=PV_DATA_FILENAME, @@ -140,7 +137,7 @@ def get_data_module(): nwp_base_path=NWP_ZARR_PATH, gsp_filename=GSP_ZARR_PATH, topographic_filename=TOPO_TIFF_PATH, - sun_filename=config.input_data.sun_zarr_path, + sun_filename=config.input_data.sun.sun_zarr_path, pin_memory=False, #: Passed to DataLoader. num_workers=num_workers, #: Passed to DataLoader. prefetch_factor=8, #: Passed to DataLoader. @@ -150,7 +147,6 @@ def get_data_module(): n_test_batches_per_epoch=1_008, collate_fn=lambda x: x, convert_to_numpy=False, #: Leave data as Pandas / Xarray for pre-preparing. - normalise_sat=False, skip_n_train_batches=maximum_batch_id_train // num_workers, skip_n_validation_batches=maximum_batch_id_validation // num_workers, skip_n_test_batches=maximum_batch_id_test // num_workers, diff --git a/tests/config/nwp_size_test.yaml b/tests/config/nwp_size_test.yaml index 2cc24549..1ab6bba8 100644 --- a/tests/config/nwp_size_test.yaml +++ b/tests/config/nwp_size_test.yaml @@ -1,23 +1,32 @@ general: + cloud: gcp description: example configuration name: example +git: null input_data: - nwp_zarr_path: tests/data/nwp_data/test.zarr - satellite_zarr_path: tests/data/sat_data.zarr - solar_pv_data_filename: tests/data/pv_data/test.nc - solar_pv_metadata_filename: tests/data/pv_metadata/UK_PV_metadata.csv - gsp_zarr_path: tests/data/gsp/test.zarr - topographic_filename: tests/data/europe_dem_2km_osgb.tif - sun_zarr_path: tests/data/sun/test.zarr + gsp: + gsp_zarr_path: tests/data/gsp/test.zarr + nwp: + nwp_channels: + - t + nwp_image_size_pixels: 64 + nwp_zarr_path: tests/data/nwp_data/test.zarr + pv: + solar_pv_data_filename: tests/data/pv_data/test.nc + solar_pv_metadata_filename: tests/data/pv_metadata/UK_PV_metadata.csv + satellite: + sat_channels: + - HRV + satellite_image_size_pixels: 64 + satellite_zarr_path: tests/data/sat_data.zarr + sun: + sun_zarr_path: tests/data/sun/test.zarr + topographic: + topographic_filename: tests/data/europe_dem_2km_osgb.tif output_data: filepath: not used by unittests! process: batch_size: 32 - forecast_minutes: 60 - history_minutes: 30 - satellite_image_size_pixels: 64 - nwp_image_size_pixels: 64 - nwp_channels: - - t - sat_channels: - - HRV + local_temp_path: ~/temp/ + seed: 1234 + upload_every_n_batches: 16 diff --git a/tests/config/test.yaml b/tests/config/test.yaml index d86ad42b..9a081e45 100644 --- a/tests/config/test.yaml +++ b/tests/config/test.yaml @@ -1,23 +1,32 @@ general: + cloud: gcp description: example configuration name: example +git: null input_data: - nwp_zarr_path: tests/data/nwp_data/test.zarr - satellite_zarr_path: tests/data/sat_data.zarr - solar_pv_data_filename: tests/data/pv_data/test.nc - solar_pv_metadata_filename: tests/data/pv_metadata/UK_PV_metadata.csv - gsp_zarr_path: tests/data/gsp/test.zarr - topographic_filename: tests/data/europe_dem_2km_osgb.tif - sun_zarr_path: tests/data/sun/test.zarr + gsp: + gsp_zarr_path: tests/data/gsp/test.zarr + nwp: + nwp_channels: + - t + nwp_image_size_pixels: 2 + nwp_zarr_path: tests/data/nwp_data/test.zarr + pv: + solar_pv_data_filename: tests/data/pv_data/test.nc + solar_pv_metadata_filename: tests/data/pv_metadata/UK_PV_metadata.csv + satellite: + sat_channels: + - HRV + satellite_image_size_pixels: 64 + satellite_zarr_path: tests/data/sat_data.zarr + sun: + sun_zarr_path: tests/data/sun/test.zarr + topographic: + topographic_filename: tests/data/europe_dem_2km_osgb.tif output_data: filepath: not used by unittests! process: batch_size: 32 - forecast_minutes: 60 - history_minutes: 30 - satellite_image_size_pixels: 64 - nwp_image_size_pixels: 2 - nwp_channels: - - t - sat_channels: - - HRV + local_temp_path: ~/temp/ + seed: 1234 + upload_every_n_batches: 16 diff --git a/tests/test_datamodule.py b/tests/test_datamodule.py index f642566a..b8fad33c 100644 --- a/tests/test_datamodule.py +++ b/tests/test_datamodule.py @@ -81,17 +81,17 @@ def test_data_module(config_filename): batch_size=config.process.batch_size, history_minutes=30, #: Number of timesteps of history, not including t0. forecast_minutes=60, #: Number of timesteps of forecast. - satellite_image_size_pixels=config.process.satellite_image_size_pixels, - nwp_image_size_pixels=config.process.nwp_image_size_pixels, - nwp_channels=config.process.nwp_channels, - sat_channels=config.process.sat_channels, # reduced for test data - pv_power_filename=config.input_data.solar_pv_data_filename, - pv_metadata_filename=config.input_data.solar_pv_metadata_filename, - sat_filename=config.input_data.satellite_zarr_path, - nwp_base_path=config.input_data.nwp_zarr_path, - gsp_filename=config.input_data.gsp_zarr_path, - topographic_filename=config.input_data.topographic_filename, - sun_filename=config.input_data.sun_zarr_path, + satellite_image_size_pixels=config.input_data.satellite.satellite_image_size_pixels, + nwp_image_size_pixels=config.input_data.nwp.nwp_image_size_pixels, + nwp_channels=config.input_data.nwp.nwp_channels[0:1], + sat_channels=config.input_data.satellite.sat_channels, # reduced for test data + pv_power_filename=config.input_data.pv.solar_pv_data_filename, + pv_metadata_filename=config.input_data.pv.solar_pv_metadata_filename, + sat_filename=config.input_data.satellite.satellite_zarr_path, + nwp_base_path=config.input_data.nwp.nwp_zarr_path, + gsp_filename=config.input_data.gsp.gsp_zarr_path, + topographic_filename=config.input_data.topographic.topographic_filename, + sun_filename=config.input_data.sun.sun_zarr_path, pin_memory=True, #: Passed to DataLoader. num_workers=0, #: Passed to DataLoader. prefetch_factor=8, #: Passed to DataLoader. @@ -135,17 +135,17 @@ def test_batch_to_batch_to_dataset(): batch_size=config.process.batch_size, history_minutes=30, #: Number of timesteps of history, not including t0. forecast_minutes=60, #: Number of timesteps of forecast. - satellite_image_size_pixels=config.process.satellite_image_size_pixels, - nwp_image_size_pixels=config.process.nwp_image_size_pixels, - nwp_channels=config.process.nwp_channels[0:1], - sat_channels=config.process.sat_channels, # reduced for test data - pv_power_filename=config.input_data.solar_pv_data_filename, - pv_metadata_filename=config.input_data.solar_pv_metadata_filename, - sat_filename=config.input_data.satellite_zarr_path, - nwp_base_path=config.input_data.nwp_zarr_path, - gsp_filename=config.input_data.gsp_zarr_path, - topographic_filename=config.input_data.topographic_filename, - sun_filename=config.input_data.sun_zarr_path, + satellite_image_size_pixels=config.input_data.satellite.satellite_image_size_pixels, + nwp_image_size_pixels=config.input_data.nwp.nwp_image_size_pixels, + nwp_channels=config.input_data.nwp.nwp_channels[0:1], + sat_channels=config.input_data.satellite.sat_channels, # reduced for test data + pv_power_filename=config.input_data.pv.solar_pv_data_filename, + pv_metadata_filename=config.input_data.pv.solar_pv_metadata_filename, + sat_filename=config.input_data.satellite.satellite_zarr_path, + nwp_base_path=config.input_data.nwp.nwp_zarr_path, + gsp_filename=config.input_data.gsp.gsp_zarr_path, + topographic_filename=config.input_data.topographic.topographic_filename, + sun_filename=config.input_data.sun.sun_zarr_path, pin_memory=True, #: Passed to DataLoader. num_workers=0, #: Passed to DataLoader. prefetch_factor=8, #: Passed to DataLoader.