From 5bf9302870b231f19ce9e692793b038eb813c678 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Thu, 30 Sep 2021 10:54:19 +0100 Subject: [PATCH 1/8] add test add frame work --- .../data_sources/gsp/gsp_data_source.py | 13 +++++++++++++ tests/data_sources/gsp/test_gsp_metadata.py | 15 ++++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py index 10325943..87e0884f 100644 --- a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py +++ b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py @@ -393,3 +393,16 @@ def load_solar_gsp_data( gsp_power_df.columns = [int(col) for col in gsp_power_df.columns] return gsp_power_df + + +def normalize_gsp_data(gsp_power: pd.DataFrame): + + # load installed capacity + + # merged with gsp power + + # check that no gsp dont have installed capacity + + # normalize by installed capacity + + return gsp_power diff --git a/tests/data_sources/gsp/test_gsp_metadata.py b/tests/data_sources/gsp/test_gsp_metadata.py index 9b4ad2f7..ed8e27bc 100644 --- a/tests/data_sources/gsp/test_gsp_metadata.py +++ b/tests/data_sources/gsp/test_gsp_metadata.py @@ -8,7 +8,10 @@ get_gsp_metadata_from_eso, get_gsp_shape_from_eso, ) -from nowcasting_dataset.data_sources.gsp.pvlive import load_pv_gsp_raw_data_from_pvlive +from nowcasting_dataset.data_sources.gsp.pvlive import ( + load_pv_gsp_raw_data_from_pvlive, + get_installed_capacity, +) def test_get_gsp_metadata_from_eso(): @@ -122,3 +125,13 @@ def test_load_gsp_raw_data_from_pvlive_many_gsp(): assert len(gsp_pv_df) == (48 + 1) * 10 assert "datetime_gmt" in gsp_pv_df.columns assert "generation_mw" in gsp_pv_df.columns + + +def test_get_installed_capacity(): + + installed_capacity = get_installed_capacity(maximum_number_of_gsp=10) + + assert len(installed_capacity) == 10 + assert "installedcapacity_mwp" == installed_capacity.name + assert installed_capacity.iloc[0] == 342.02623 + assert installed_capacity.iloc[9] == 308.00432 From dcdfe415493f5d471c4827ebcc66dfa7b7f8929e Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Thu, 30 Sep 2021 10:59:06 +0100 Subject: [PATCH 2/8] add method to get installed capacity for each gsp --- nowcasting_dataset/data_sources/gsp/pvlive.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/nowcasting_dataset/data_sources/gsp/pvlive.py b/nowcasting_dataset/data_sources/gsp/pvlive.py index 7648f0af..6f4f14bd 100644 --- a/nowcasting_dataset/data_sources/gsp/pvlive.py +++ b/nowcasting_dataset/data_sources/gsp/pvlive.py @@ -2,6 +2,8 @@ import logging import pandas as pd from pvlive_api import PVLive +from typing import Optional +import pytz from nowcasting_dataset.data_sources.gsp.eso import get_list_of_gsp_ids @@ -87,3 +89,50 @@ def load_pv_gsp_raw_data_from_pvlive( gsp_data_df["datetime_gmt"] = gsp_data_df["datetime_gmt"].dt.tz_localize(None) return gsp_data_df + + +def get_installed_capacity( + start: Optional[datetime] = datetime(2021, 1, 1, tzinfo=pytz.utc), + maximum_number_of_gsp: Optional[int] = None, +) -> pd.Series: + """ + Get the installed capacity of each gsp + + This can take ~30 seconds for getting the full list + + Args: + start: optional datetime when the installed cpapcity is collected + maximum_number_of_gsp: Truncate list of GSPs to be no larger than this number of GSPs. + Set to None to disable truncation. + + Returns: pd.Series of installed capacity indexed by gsp_id + + """ + + logger.debug(f"Getting all installed capacity at {start}") + + # get a lit of gsp ids + gsp_ids = get_list_of_gsp_ids(maximum_number_of_gsp=maximum_number_of_gsp) + + # setup pv Live class, although here we are getting historic data + pvl = PVLive() + + # loop over gsp_id to get installed capacity + data = [] + for gsp_id in gsp_ids: + d = pvl.at_time( + start, + entity_type="gsp", + extra_fields="installedcapacity_mwp", + dataframe=True, + entity_id=gsp_id, + ) + data.append(d) + + # join data together + data_df = pd.concat(data) + + # set gsp_id as index + data_df.set_index("gsp_id", inplace=True) + + return data_df["installedcapacity_mwp"] From d149f315789ecf27c622bf0a59dbfc3e904707d6 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Thu, 30 Sep 2021 11:26:24 +0100 Subject: [PATCH 3/8] normalise generation data on-the-fly --- .../data_sources/gsp/gsp_data_source.py | 5 +- nowcasting_dataset/data_sources/gsp/pvlive.py | 11 ++- tests/data_sources/gsp/test_gsp_metadata.py | 66 -------------- tests/data_sources/gsp/test_gsp_pvlive.py | 88 +++++++++++++++++++ 4 files changed, 101 insertions(+), 69 deletions(-) create mode 100644 tests/data_sources/gsp/test_gsp_pvlive.py diff --git a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py index 87e0884f..b6b35f5f 100644 --- a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py +++ b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py @@ -54,6 +54,8 @@ class GSPDataSource(ImageDataSource): get_center: bool = True # the maximum number of gsp's to be loaded for data sample n_gsp_per_example: int = DEFAULT_N_GSP_PER_EXAMPLE + # scale from zero to one + do_scale_0_to_1: bool = False def __post_init__(self, image_size_pixels: int, meters_per_pixel: int): """ @@ -93,7 +95,8 @@ def load(self): ) # scale from 0 to 1 - self.gsp_power = scale_to_0_to_1(self.gsp_power) + if self.do_scale_0_to_1: + self.gsp_power = scale_to_0_to_1(self.gsp_power) logger.debug(f"There are {len(self.gsp_power.columns)} GSP") diff --git a/nowcasting_dataset/data_sources/gsp/pvlive.py b/nowcasting_dataset/data_sources/gsp/pvlive.py index 6f4f14bd..a7ffd6c8 100644 --- a/nowcasting_dataset/data_sources/gsp/pvlive.py +++ b/nowcasting_dataset/data_sources/gsp/pvlive.py @@ -13,7 +13,7 @@ def load_pv_gsp_raw_data_from_pvlive( - start: datetime, end: datetime, number_of_gsp: int = None + start: datetime, end: datetime, number_of_gsp: int = None, normalize_data: bool = True ) -> pd.DataFrame: """ Load raw pv gsp data from pvlive. Note that each gsp is loaded separately. Also the data is loaded in 30 day chunks. @@ -21,6 +21,7 @@ def load_pv_gsp_raw_data_from_pvlive( start: the start date for gsp data to load end: the end date for gsp data to load number_of_gsp: The number of gsp to load. Note that on 2021-09-01 there were 338 to load. + normalize_data: Option to normalize the generation according to installed capacity Returns: Data frame of time series of gsp data. Shows PV data for each GSP from {start} to {end} @@ -58,7 +59,7 @@ def load_pv_gsp_raw_data_from_pvlive( end=end_chunk, entity_type="gsp", entity_id=gsp_id, - extra_fields="", + extra_fields="installedcapacity_mwp", dataframe=True, ) ) @@ -74,6 +75,12 @@ def load_pv_gsp_raw_data_from_pvlive( one_gsp_data_df = pd.concat(one_gsp_data_df) one_gsp_data_df = one_gsp_data_df.sort_values(by=["gsp_id", "datetime_gmt"]) + # normalize + if normalize_data: + one_gsp_data_df["generation_mw"] = ( + one_gsp_data_df["generation_mw"] / one_gsp_data_df["installedcapacity_mwp"] + ) + # append to longer list gsp_data_df.append(one_gsp_data_df) diff --git a/tests/data_sources/gsp/test_gsp_metadata.py b/tests/data_sources/gsp/test_gsp_metadata.py index ed8e27bc..9b4e5cae 100644 --- a/tests/data_sources/gsp/test_gsp_metadata.py +++ b/tests/data_sources/gsp/test_gsp_metadata.py @@ -1,17 +1,10 @@ -from datetime import datetime - import geopandas as gpd import pandas as pd -import pytz from nowcasting_dataset.data_sources.gsp.eso import ( get_gsp_metadata_from_eso, get_gsp_shape_from_eso, ) -from nowcasting_dataset.data_sources.gsp.pvlive import ( - load_pv_gsp_raw_data_from_pvlive, - get_installed_capacity, -) def test_get_gsp_metadata_from_eso(): @@ -76,62 +69,3 @@ def test_get_pv_gsp_shape_from_eso(): assert "RegionID" in gsp_shapes.columns assert "RegionName" in gsp_shapes.columns assert "geometry" in gsp_shapes.columns - - -def test_load_gsp_raw_data_from_pvlive_one_gsp_one_day(): - """ - Test that one gsp system data can be loaded, just for one day - """ - - start = datetime(2019, 1, 1, tzinfo=pytz.utc) - end = datetime(2019, 1, 2, tzinfo=pytz.utc) - - gsp_pv_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end, number_of_gsp=1) - - assert isinstance(gsp_pv_df, pd.DataFrame) - assert len(gsp_pv_df) == (48 + 1) - assert "datetime_gmt" in gsp_pv_df.columns - assert "generation_mw" in gsp_pv_df.columns - - -def test_load_gsp_raw_data_from_pvlive_one_gsp(): - """ - Test that one gsp system data can be loaded - """ - - start = datetime(2019, 1, 1, tzinfo=pytz.utc) - end = datetime(2019, 3, 1, tzinfo=pytz.utc) - - gsp_pv_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end, number_of_gsp=1) - - assert isinstance(gsp_pv_df, pd.DataFrame) - assert len(gsp_pv_df) == (48 * 59 + 1) - # 30 days in january, 29 days in february, plus one for the first timestamp in march - assert "datetime_gmt" in gsp_pv_df.columns - assert "generation_mw" in gsp_pv_df.columns - - -def test_load_gsp_raw_data_from_pvlive_many_gsp(): - """ - Test that one gsp system data can be loaded - """ - - start = datetime(2019, 1, 1, tzinfo=pytz.utc) - end = datetime(2019, 1, 2, tzinfo=pytz.utc) - - gsp_pv_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end, number_of_gsp=10) - - assert isinstance(gsp_pv_df, pd.DataFrame) - assert len(gsp_pv_df) == (48 + 1) * 10 - assert "datetime_gmt" in gsp_pv_df.columns - assert "generation_mw" in gsp_pv_df.columns - - -def test_get_installed_capacity(): - - installed_capacity = get_installed_capacity(maximum_number_of_gsp=10) - - assert len(installed_capacity) == 10 - assert "installedcapacity_mwp" == installed_capacity.name - assert installed_capacity.iloc[0] == 342.02623 - assert installed_capacity.iloc[9] == 308.00432 diff --git a/tests/data_sources/gsp/test_gsp_pvlive.py b/tests/data_sources/gsp/test_gsp_pvlive.py new file mode 100644 index 00000000..30e37d52 --- /dev/null +++ b/tests/data_sources/gsp/test_gsp_pvlive.py @@ -0,0 +1,88 @@ +from datetime import datetime + +import pandas as pd +import pytz + +from nowcasting_dataset.data_sources.gsp.pvlive import ( + load_pv_gsp_raw_data_from_pvlive, + get_installed_capacity, +) + + +def test_load_gsp_raw_data_from_pvlive_one_gsp_one_day(): + """ + Test that one gsp system data can be loaded, just for one day + """ + + start = datetime(2019, 1, 1, tzinfo=pytz.utc) + end = datetime(2019, 1, 2, tzinfo=pytz.utc) + + gsp_pv_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end, number_of_gsp=1) + + assert isinstance(gsp_pv_df, pd.DataFrame) + assert len(gsp_pv_df) == (48 + 1) + assert "datetime_gmt" in gsp_pv_df.columns + assert "generation_mw" in gsp_pv_df.columns + + +def test_load_gsp_raw_data_from_pvlive_one_gsp_one_day_not_normalised(): + """ + Test that one gsp system data can be loaded, just for one day, and is normalized correctly + """ + + # pick a summer day + start = datetime(2019, 6, 21, tzinfo=pytz.utc) + end = datetime(2019, 6, 22, tzinfo=pytz.utc) + + gsp_pv_df = load_pv_gsp_raw_data_from_pvlive( + start=start, end=end, number_of_gsp=1, normalize_data=False + ) + assert gsp_pv_df["generation_mw"].max() > 1 + + gsp_pv_df = load_pv_gsp_raw_data_from_pvlive( + start=start, end=end, number_of_gsp=1, normalize_data=True + ) + assert gsp_pv_df["generation_mw"].max() <= 1 + + +def test_load_gsp_raw_data_from_pvlive_one_gsp(): + """a + Test that one gsp system data can be loaded + """ + + start = datetime(2019, 1, 1, tzinfo=pytz.utc) + end = datetime(2019, 3, 1, tzinfo=pytz.utc) + + gsp_pv_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end, number_of_gsp=1) + + assert isinstance(gsp_pv_df, pd.DataFrame) + assert len(gsp_pv_df) == (48 * 59 + 1) + # 30 days in january, 29 days in february, plus one for the first timestamp in march + assert "datetime_gmt" in gsp_pv_df.columns + assert "generation_mw" in gsp_pv_df.columns + + +def test_load_gsp_raw_data_from_pvlive_many_gsp(): + """ + Test that one gsp system data can be loaded + """ + + start = datetime(2019, 1, 1, tzinfo=pytz.utc) + end = datetime(2019, 1, 2, tzinfo=pytz.utc) + + gsp_pv_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end, number_of_gsp=10) + + assert isinstance(gsp_pv_df, pd.DataFrame) + assert len(gsp_pv_df) == (48 + 1) * 10 + assert "datetime_gmt" in gsp_pv_df.columns + assert "generation_mw" in gsp_pv_df.columns + + +def test_get_installed_capacity(): + + installed_capacity = get_installed_capacity(maximum_number_of_gsp=10) + + assert len(installed_capacity) == 10 + assert "installedcapacity_mwp" == installed_capacity.name + assert installed_capacity.iloc[0] == 342.02623 + assert installed_capacity.iloc[9] == 308.00432 From 3201a35eda0f96fe8dee4117d96e4e0e41be168f Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Thu, 30 Sep 2021 11:47:33 +0100 Subject: [PATCH 4/8] update v of gsp data --- nowcasting_dataset/config/gcp.yaml | 2 +- nowcasting_dataset/config/on_premises.yaml | 2 +- .../data_sources/gsp/gsp_data_source.py | 13 ------------- tests/data_sources/get_test_data.py | 2 +- 4 files changed, 3 insertions(+), 16 deletions(-) diff --git a/nowcasting_dataset/config/gcp.yaml b/nowcasting_dataset/config/gcp.yaml index 92574e07..4f5ded46 100644 --- a/nowcasting_dataset/config/gcp.yaml +++ b/nowcasting_dataset/config/gcp.yaml @@ -6,7 +6,7 @@ input_data: satellite_zarr_path: gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16_single_timestep.zarr solar_pv_data_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_timeseries_batch.nc solar_pv_metadata_filename: gs://solar-pv-nowcasting-data/PV/PVOutput.org/UK_PV_metadata.csv - gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/PVOutput.org/PV/GSP/v0/pv_gsp.zarr + gsp_zarr_path: gs://solar-pv-nowcasting-data/PV/PVOutput.org/PV/GSP/v1/pv_gsp.zarr topographic_filename: gs://solar-pv-nowcasting-data/Topographic/europe_dem_1km_osgb.tif output_data: filepath: gs://solar-pv-nowcasting-data/prepared_ML_training_data/v6/ diff --git a/nowcasting_dataset/config/on_premises.yaml b/nowcasting_dataset/config/on_premises.yaml index 984cc132..20cf9931 100644 --- a/nowcasting_dataset/config/on_premises.yaml +++ b/nowcasting_dataset/config/on_premises.yaml @@ -7,7 +7,7 @@ input_data: solar_pv_path: solar_pv_data_filename: /storage/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/raw/PV/PVOutput.org/UK_PV_timeseries_batch.nc solar_pv_metadata_filename: /storage/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/raw/PV/PVOutput.org/UK_PV_metadata.csv - gsp_zarr_path: /storage/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/raw/PV/GSP/v0/pv_gsp.zarr + gsp_zarr_path: /storage/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/raw/PV/GSP/v1/pv_gsp.zarr output_data: filepath: /storage/data/ocf/solar_pv_nowcasting/nowcasting_dataset_pipeline/prepared_ML_training_data/v7/ process: diff --git a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py index b6b35f5f..00708821 100644 --- a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py +++ b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py @@ -396,16 +396,3 @@ def load_solar_gsp_data( gsp_power_df.columns = [int(col) for col in gsp_power_df.columns] return gsp_power_df - - -def normalize_gsp_data(gsp_power: pd.DataFrame): - - # load installed capacity - - # merged with gsp power - - # check that no gsp dont have installed capacity - - # normalize by installed capacity - - return gsp_power diff --git a/tests/data_sources/get_test_data.py b/tests/data_sources/get_test_data.py index 2369b786..c038bfe4 100644 --- a/tests/data_sources/get_test_data.py +++ b/tests/data_sources/get_test_data.py @@ -81,7 +81,7 @@ # ### GSP data gsp = GSPDataSource( - filename="gs://solar-pv-nowcasting-data/PV/GSP/v0/pv_gsp.zarr", + filename="gs://solar-pv-nowcasting-data/PV/GSP/v1/pv_gsp.zarr", start_dt=start_dt, end_dt=end_dt, history_minutes=30, From e8e3b34276f3d5d850875ea7eedde04954afc064 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Thu, 30 Sep 2021 12:12:12 +0100 Subject: [PATCH 5/8] update scripts and load method for making data to save installed capacity too, --- .../data_sources/gsp/gsp_data_source.py | 9 +++++++++ scripts/get_raw_pv_gsp_data.py | 19 +++++++++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py index 00708821..413554db 100644 --- a/nowcasting_dataset/data_sources/gsp/gsp_data_source.py +++ b/nowcasting_dataset/data_sources/gsp/gsp_data_source.py @@ -383,7 +383,16 @@ def load_solar_gsp_data( # Open data - it may be quicker to open byte file first, but decided just to keep it like this at the moment gsp_power = xr.open_dataset(filename, engine="zarr") gsp_power = gsp_power.sel(datetime_gmt=slice(start_dt, end_dt)) + + # only take generation data + gsp_power = gsp_power.generation_mw + + # make dataframe with index datetime_gmt and columns og gsp_id gsp_power_df = gsp_power.to_dataframe() + gsp_power_df.reset_index(inplace=True) + gsp_power_df = gsp_power_df.pivot( + index="datetime_gmt", columns="gsp_id", values="generation_mw" + ) # Save memory del gsp_power diff --git a/scripts/get_raw_pv_gsp_data.py b/scripts/get_raw_pv_gsp_data.py index 5406d053..32b56c28 100755 --- a/scripts/get_raw_pv_gsp_data.py +++ b/scripts/get_raw_pv_gsp_data.py @@ -11,6 +11,7 @@ import yaml import os import numcodecs +import xarray as xr from nowcasting_dataset.data_sources.gsp.pvlive import load_pv_gsp_raw_data_from_pvlive from pathlib import Path @@ -36,11 +37,21 @@ data_df = load_pv_gsp_raw_data_from_pvlive(start=start, end=end) # pivot to index as datetime_gmt, and columns as gsp_id -data_df = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="generation_mw") -data_df.columns = [str(col) for col in data_df.columns] +data_generation = data_df.pivot(index="datetime_gmt", columns="gsp_id", values="generation_mw") +data_generation.columns = [str(col) for col in data_generation.columns] +data_generation_xarray = xr.DataArray( + data_generation, name="generation_mw", dims=["datetime_gmt", "gsp_id"] +) -# change to xarray -data_xarray = data_df.to_xarray() +data_capacity = data_df.pivot( + index="datetime_gmt", columns="gsp_id", values="installedcapacity_mwp" +) +data_capacity.columns = [str(col) for col in data_capacity.columns] +data_capacity_xarray = xr.DataArray( + data_capacity, name="installedcapacity_mwp", dims=["datetime_gmt", "gsp_id"] +) + +data_xarray = xr.merge([data_generation_xarray, data_capacity_xarray]) # save config to file with open(os.path.join(LOCAL_TEMP_PATH, "configuration.yaml"), "w+") as f: From 1c07a78ecfcbbf6dc6d986365204c8d498a85726 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Thu, 30 Sep 2021 12:26:22 +0100 Subject: [PATCH 6/8] update test data --- nowcasting_dataset/dataset/split/__init__.py | 0 tests/data/gsp/test.zarr/.zmetadata | 487 ++---------------- tests/data/gsp/test.zarr/1/.zattrs | 5 - tests/data/gsp/test.zarr/1/0 | Bin 688 -> 0 bytes tests/data/gsp/test.zarr/10/.zarray | 20 - tests/data/gsp/test.zarr/10/0 | Bin 699 -> 0 bytes tests/data/gsp/test.zarr/100/.zattrs | 5 - tests/data/gsp/test.zarr/100/0 | Bin 685 -> 0 bytes tests/data/gsp/test.zarr/101/.zattrs | 5 - tests/data/gsp/test.zarr/101/0 | Bin 685 -> 0 bytes tests/data/gsp/test.zarr/102/.zarray | 20 - tests/data/gsp/test.zarr/102/.zattrs | 5 - tests/data/gsp/test.zarr/102/0 | Bin 686 -> 0 bytes tests/data/gsp/test.zarr/103/.zarray | 20 - tests/data/gsp/test.zarr/103/.zattrs | 5 - tests/data/gsp/test.zarr/103/0 | Bin 685 -> 0 bytes tests/data/gsp/test.zarr/104/.zarray | 20 - tests/data/gsp/test.zarr/104/.zattrs | 5 - tests/data/gsp/test.zarr/104/0 | Bin 691 -> 0 bytes tests/data/gsp/test.zarr/105/.zarray | 20 - tests/data/gsp/test.zarr/105/.zattrs | 5 - tests/data/gsp/test.zarr/105/0 | Bin 688 -> 0 bytes tests/data/gsp/test.zarr/106/.zarray | 20 - tests/data/gsp/test.zarr/106/.zattrs | 5 - tests/data/gsp/test.zarr/106/0 | Bin 671 -> 0 bytes tests/data/gsp/test.zarr/107/.zarray | 20 - tests/data/gsp/test.zarr/107/.zattrs | 5 - tests/data/gsp/test.zarr/107/0 | Bin 685 -> 0 bytes tests/data/gsp/test.zarr/108/.zarray | 20 - tests/data/gsp/test.zarr/108/.zattrs | 5 - tests/data/gsp/test.zarr/108/0 | Bin 683 -> 0 bytes tests/data/gsp/test.zarr/109/.zarray | 20 - tests/data/gsp/test.zarr/109/.zattrs | 5 - tests/data/gsp/test.zarr/109/0 | Bin 674 -> 0 bytes tests/data/gsp/test.zarr/11/.zarray | 20 - tests/data/gsp/test.zarr/11/.zattrs | 5 - tests/data/gsp/test.zarr/11/0 | Bin 694 -> 0 bytes tests/data/gsp/test.zarr/110/.zarray | 20 - tests/data/gsp/test.zarr/110/.zattrs | 5 - tests/data/gsp/test.zarr/110/0 | Bin 683 -> 0 bytes tests/data/gsp/test.zarr/111/.zarray | 20 - tests/data/gsp/test.zarr/111/.zattrs | 5 - tests/data/gsp/test.zarr/111/0 | Bin 683 -> 0 bytes tests/data/gsp/test.zarr/112/.zarray | 20 - tests/data/gsp/test.zarr/112/.zattrs | 5 - tests/data/gsp/test.zarr/112/0 | Bin 140 -> 0 bytes tests/data/gsp/test.zarr/113/.zarray | 20 - tests/data/gsp/test.zarr/113/.zattrs | 5 - tests/data/gsp/test.zarr/113/0 | Bin 681 -> 0 bytes tests/data/gsp/test.zarr/114/.zarray | 20 - tests/data/gsp/test.zarr/114/.zattrs | 5 - tests/data/gsp/test.zarr/114/0 | Bin 140 -> 0 bytes tests/data/gsp/test.zarr/115/.zarray | 20 - tests/data/gsp/test.zarr/115/.zattrs | 5 - tests/data/gsp/test.zarr/115/0 | Bin 677 -> 0 bytes tests/data/gsp/test.zarr/116/.zarray | 20 - tests/data/gsp/test.zarr/116/.zattrs | 5 - tests/data/gsp/test.zarr/116/0 | Bin 684 -> 0 bytes tests/data/gsp/test.zarr/datetime_gmt/.zarray | 6 +- tests/data/gsp/test.zarr/datetime_gmt/.zattrs | 4 +- tests/data/gsp/test.zarr/datetime_gmt/0 | Bin 336 -> 138 bytes .../test.zarr/{1 => generation_mw}/.zarray | 10 +- .../data/gsp/test.zarr/generation_mw/.zattrs | 6 + tests/data/gsp/test.zarr/generation_mw/0.0 | Bin 0 -> 2630 bytes .../gsp/test.zarr/{101 => gsp_id}/.zarray | 10 +- .../data/gsp/test.zarr/{10 => gsp_id}/.zattrs | 4 +- tests/data/gsp/test.zarr/gsp_id/0 | Bin 0 -> 71 bytes .../{100 => installedcapacity_mwp}/.zarray | 10 +- .../test.zarr/installedcapacity_mwp/.zattrs | 6 + .../gsp/test.zarr/installedcapacity_mwp/0.0 | Bin 0 -> 218 bytes tests/data_sources/get_test_data.py | 27 +- 71 files changed, 80 insertions(+), 925 deletions(-) create mode 100644 nowcasting_dataset/dataset/split/__init__.py delete mode 100644 tests/data/gsp/test.zarr/1/.zattrs delete mode 100644 tests/data/gsp/test.zarr/1/0 delete mode 100644 tests/data/gsp/test.zarr/10/.zarray delete mode 100644 tests/data/gsp/test.zarr/10/0 delete mode 100644 tests/data/gsp/test.zarr/100/.zattrs delete mode 100644 tests/data/gsp/test.zarr/100/0 delete mode 100644 tests/data/gsp/test.zarr/101/.zattrs delete mode 100644 tests/data/gsp/test.zarr/101/0 delete mode 100644 tests/data/gsp/test.zarr/102/.zarray delete mode 100644 tests/data/gsp/test.zarr/102/.zattrs delete mode 100644 tests/data/gsp/test.zarr/102/0 delete mode 100644 tests/data/gsp/test.zarr/103/.zarray delete mode 100644 tests/data/gsp/test.zarr/103/.zattrs delete mode 100644 tests/data/gsp/test.zarr/103/0 delete mode 100644 tests/data/gsp/test.zarr/104/.zarray delete mode 100644 tests/data/gsp/test.zarr/104/.zattrs delete mode 100644 tests/data/gsp/test.zarr/104/0 delete mode 100644 tests/data/gsp/test.zarr/105/.zarray delete mode 100644 tests/data/gsp/test.zarr/105/.zattrs delete mode 100644 tests/data/gsp/test.zarr/105/0 delete mode 100644 tests/data/gsp/test.zarr/106/.zarray delete mode 100644 tests/data/gsp/test.zarr/106/.zattrs delete mode 100644 tests/data/gsp/test.zarr/106/0 delete mode 100644 tests/data/gsp/test.zarr/107/.zarray delete mode 100644 tests/data/gsp/test.zarr/107/.zattrs delete mode 100644 tests/data/gsp/test.zarr/107/0 delete mode 100644 tests/data/gsp/test.zarr/108/.zarray delete mode 100644 tests/data/gsp/test.zarr/108/.zattrs delete mode 100644 tests/data/gsp/test.zarr/108/0 delete mode 100644 tests/data/gsp/test.zarr/109/.zarray delete mode 100644 tests/data/gsp/test.zarr/109/.zattrs delete mode 100644 tests/data/gsp/test.zarr/109/0 delete mode 100644 tests/data/gsp/test.zarr/11/.zarray delete mode 100644 tests/data/gsp/test.zarr/11/.zattrs delete mode 100644 tests/data/gsp/test.zarr/11/0 delete mode 100644 tests/data/gsp/test.zarr/110/.zarray delete mode 100644 tests/data/gsp/test.zarr/110/.zattrs delete mode 100644 tests/data/gsp/test.zarr/110/0 delete mode 100644 tests/data/gsp/test.zarr/111/.zarray delete mode 100644 tests/data/gsp/test.zarr/111/.zattrs delete mode 100644 tests/data/gsp/test.zarr/111/0 delete mode 100644 tests/data/gsp/test.zarr/112/.zarray delete mode 100644 tests/data/gsp/test.zarr/112/.zattrs delete mode 100644 tests/data/gsp/test.zarr/112/0 delete mode 100644 tests/data/gsp/test.zarr/113/.zarray delete mode 100644 tests/data/gsp/test.zarr/113/.zattrs delete mode 100644 tests/data/gsp/test.zarr/113/0 delete mode 100644 tests/data/gsp/test.zarr/114/.zarray delete mode 100644 tests/data/gsp/test.zarr/114/.zattrs delete mode 100644 tests/data/gsp/test.zarr/114/0 delete mode 100644 tests/data/gsp/test.zarr/115/.zarray delete mode 100644 tests/data/gsp/test.zarr/115/.zattrs delete mode 100644 tests/data/gsp/test.zarr/115/0 delete mode 100644 tests/data/gsp/test.zarr/116/.zarray delete mode 100644 tests/data/gsp/test.zarr/116/.zattrs delete mode 100644 tests/data/gsp/test.zarr/116/0 rename tests/data/gsp/test.zarr/{1 => generation_mw}/.zarray (78%) create mode 100644 tests/data/gsp/test.zarr/generation_mw/.zattrs create mode 100644 tests/data/gsp/test.zarr/generation_mw/0.0 rename tests/data/gsp/test.zarr/{101 => gsp_id}/.zarray (77%) rename tests/data/gsp/test.zarr/{10 => gsp_id}/.zattrs (59%) create mode 100644 tests/data/gsp/test.zarr/gsp_id/0 rename tests/data/gsp/test.zarr/{100 => installedcapacity_mwp}/.zarray (78%) create mode 100644 tests/data/gsp/test.zarr/installedcapacity_mwp/.zattrs create mode 100644 tests/data/gsp/test.zarr/installedcapacity_mwp/0.0 diff --git a/nowcasting_dataset/dataset/split/__init__.py b/nowcasting_dataset/dataset/split/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/data/gsp/test.zarr/.zmetadata b/tests/data/gsp/test.zarr/.zmetadata index a6cf4ed1..ab697b80 100644 --- a/tests/data/gsp/test.zarr/.zmetadata +++ b/tests/data/gsp/test.zarr/.zmetadata @@ -4,384 +4,9 @@ ".zgroup": { "zarr_format": 2 }, - "1/.zarray": { - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "Ibf<_XXu}4)y9W#np`yD^ZMTmyUHtaH)d=* zJ7f7KsL7j`pWN_o!yN&~8^?a%>9_c-3^jSKoXu62ziJ`^is=?IFS(Ag<1|@e(eX#G zKfHeP^x>^5$M25CF6Zhiv>jsUQF0PVNsN&o-= diff --git a/tests/data/gsp/test.zarr/10/.zarray b/tests/data/gsp/test.zarr/10/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/10/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "j<6*E#@7%XkH;`cq)MS^Js`EC!-RaZx#XITO)D-RvsL6#+c8i}k$mw24I(&gm z^#Fqxvavum!sNKMd>6KPm@*izXvjY=K8HaLYO;l8gSm}Ku0TlQulDV8Bqj8rCi5~Z zJM~88Mp@Ky@h|D=S`1fknyjuDly}7FCBv8C-;3CNt}vKGO;-PxxF$JNxOlxm)mHvT zU9TQPO}^5_-2W-=c=^f^jX|h?~PM!K?7Z~hkW|sRW?O}+4ntWyMtOdS* z=WOaaP#j|GtC83ZHM!w&>ci%RDgp~bB=35yb76Re(`22{Ovn09ObjjS1SlZB@rxc%z=n-9+( z+_`pg{bB}bsL25<4_~|c@WJhSH?Lngxoeu@SE$MI4)b;#J$vEeZI%$8FU!z!KQKPGuYeP+o>=xZ~)V$3IjV6gE)f$LjYJS0JNgY@Bjb+ diff --git a/tests/data/gsp/test.zarr/100/.zattrs b/tests/data/gsp/test.zarr/100/.zattrs deleted file mode 100644 index e4c12287..00000000 --- a/tests/data/gsp/test.zarr/100/.zattrs +++ /dev/null @@ -1,5 +0,0 @@ -{ - "_ARRAY_DIMENSIONS": [ - "datetime_gmt" - ] -} \ No newline at end of file diff --git a/tests/data/gsp/test.zarr/100/0 b/tests/data/gsp/test.zarr/100/0 deleted file mode 100644 index ede126e6581586a5b14562e904e225ec452c0a1b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 685 zcmZQ#ROIMjVPF8^wM+~QA`A=+p+GFdz{v22X`1TUbX&G*3}OY$tPHQt*aj#va5FLd zVZ6v@5n*?-?6;P$(3j^=SQ-=Oflg8~o+tK}Y* znf1$yVbeAVp)UXMvpb;%n|<3@?kC1R?W0*_<@|fR$xwsO+?JYe?7sH6>M@1Hv-YgP zD5e5A2!oT3cD*?+ufgzQYH_B>Y9;F=sKG9t`LW$P%U&yAjY_SME4c$T_Dq zp`2B_LzyNppT}*mkg3sKH?9zd_T3XFxjok2x(I48pJCm+y>mYt@-qpzxx@U1amX!<<<}V}UZZDVK0X10m?b(3ehZ7fnxqknq zsIP(?)Zn6+BPv$*x;q1=vn<@ITkMb9VEr3O?weeHF^F=mllEEX@S_WA@bcXkZTr+* zcI3Ilyh^{RYzj4)$0^`})&uTp{YeoUJAAkn;x;&X+OESlpS}6;=JEB5r*|(1egiew zvU}N%LuW2szjpP)p{+~W)Sw36s$90?@X2#$&zwAVXxFA0Vdw^{0R=$GP1>H3LB@D(%k;l_f zOa*cf1{cQ8j&%4II*Vb=wcevnPA!|E20!0fd1gcRnj@>q+ut;tGWh^CILm$APdh2o z{k5(E2NaS#&2Ss6%ztjSZDQO3hTk5_d&FBaE1?E^pD{kCJ(1-?;Ry+Q@j^u6iLEU^pa#pd zE!@28(6O@@&K}#hYuyAzsKJ*C=dIng_u$D>NA_)9ySU8(-C#AK04TXh+cPrA*n!RB W)(7(B8F-l(r0onD*cBN9z?uNx5y&$D diff --git a/tests/data/gsp/test.zarr/102/.zarray b/tests/data/gsp/test.zarr/102/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/102/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "p$68&2wFmft!ip zHSoGAWY^8 zl(stL{n~1Z%f_jGQ`CcwK~3&o_*C}D7vEbB3}<4^?5{HiLruQ$(Z+837NZYQ^XjH7 z-gk~+1G2F|Hp1ksOSr=}2A<{=$ey;*q#$<@7u4jKSJ`4-3lB0sVwmUYH%le=GSp;| z<695cMxUyFsl7*)RkxGj7EY6&X!nHO*tY!em21x~@YOBlSPnJ$Q$S)#T*xBhe1=1I z&tES+_z7zAaW<)Av)j#L&Trg(SaMwz10PP4Ew0-yYzX^R{dd9f%Be@#%;!Q)KJt0u zKJE3Jzt=I$-N2F`-hKpX@&TUAQz288%tGoD9@v#WW|)H0WaWD{U2fOU{7@2EES8yU zsdp1<@}t}8lHxB4zNRuPxvRsYa9I~>vb=`ambTyLt#ysp{fUZu%pic%&^#0ka4>!-AI=C$3A=KoI$x9F1c=YJT;X6mJT%A+J k@D|Nv1t1@k>ZI)%8D#9hhH>jN@Eb75iZd861b|fo01ZIc$N&HU diff --git a/tests/data/gsp/test.zarr/103/.zarray b/tests/data/gsp/test.zarr/103/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/103/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "XF2j#omA_XBthI+4T)oPvda@bw>a@=`#qV~V&w(2JclYOsf%%E)V(n}G8rlp0 zM==%1K^VMN?Ad0BZ0Di>$=+lA>yhLU>r4)=Hm+8oVNNeb+ll zq10{Pckq5ozEp_Y;F>3ymaj8Dv@@*Q@b~g3Vdfc7gO{%q*DWa9{`tuNv(BX^HEK|U zpYuFB`_8|fYt4M#s9sIgaNGuW)@iL)3(|0BNSwuZaly)k`=AD2j1cUqjS67i@Zh9u z%lx8+P=h6AJgI$PygTWP`)+BWV(VYH4Nmp+X0)%rH=99wU8{*s@TW+q!IF+zdm~rB z46vG6v+~>Ds?|_~pBYM-JUk<|ImP1Tu@IqCR=5q$Te@cF`71YHe|U56+>u>N@;^fj zHtk-snxJ83MqX0N1b2ZU6uP diff --git a/tests/data/gsp/test.zarr/104/.zarray b/tests/data/gsp/test.zarr/104/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/104/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "xK75nwW&Z2=8Qe5(RuQ=u+u-E1qbFX`fmp{c<)5! zc}*$-cYOOfWKWl}%i%Qm$NH=KH{NP6EWCVC?-1uKIap9c$(Zx?g$lH|s?8{Vb}{E{ zIMm>IGbX;e9MSS|ee{Yn&4)iia2lMOV^BDmPoClL^xE$=YdR%ap)O|rZSW;3rrojl z$)ml`zwc`*fEwH)KYRPP`;Xo(sI>XVb$I)3oCY_qxtP+jAf4gqG1fJ4?XFe{P=jOd zo{pST@U?A4h26I9`%+AiP=ojD{BCArbF0|3bjMby=Qmn#8vK9h;d@V>KmG9N*5%XN z)^wHUKn;#xeDv6*`*&Zwe(~z%(;J7ECqNCZSi0xvxr>*tJ$&=*;{FXCc4!9c0QsQg lre$Z($RJ|}HjGP;!5#=y7#KL27#LL<*qIpEWEld$IspyE+^hfq diff --git a/tests/data/gsp/test.zarr/105/.zarray b/tests/data/gsp/test.zarr/105/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/105/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "(`&;rrgOCBj;Ot9< z^S(30uhNM4I6YumpiDB<;I{v(4lO(3^-8+_fo;Jwm;ZmE21_0PDO3Au>;DQ4pYmzn zj_D$s3S=V;Ui!E}Re-^)==8ag6|3XSySG9OKK9YjNg-|v2=S3x-WtnY{z0W zo2jF7&AvK*m(S}}F5ooSe`^Nc6$bHpQNI4^_h$S!b^>a!uH{>qtl5Tji&tsBe)J~r zdLz`}!^y_^SLPcTvX*Vzxw^q+DNche+(m>OAKY{)s6+9kv~Y8XS46FMNu%zsj6G<0*b0QcQ6gY&84St!E!zJ%9A@^11yR z7Bwz}8l1H9(B&J?pS^u?`^t?Q7xt{MgBq;sFmv g+Mba?#tv*2w?2cmy**evg9-x&69bzfLjYJC0O&2zp#T5? diff --git a/tests/data/gsp/test.zarr/106/.zarray b/tests/data/gsp/test.zarr/106/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/106/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "A8qQnI?n5GdZ_a7gL^mWbsj8;ALX?AYkR7rMU9v=}QiflP$C0U=Ef4m|Kbey8-d{KIxNJ*c@Fx{~GZ7`~JnpLSDH?_8vQ4r*?3egKR4 zMu}_Fm6jJ;nf{zxfXm#*`yaLiO#5M*6sTLqru@MMYHq!wQCSbe%@n@s10kDSSY|dr z&AlkT&ZO#KU;d`n`zDc7^-XJWnQK=w^W;Cnh2ObLWgDBnO?m@0w|YsjLj=Q_;G2g; zf0#dJHSLC)yS#hVCLP9gmoD`A`gHP5-mx8*xzQQ9XDIk>mko2wzc{G5!AH6Ujcj`R4dgQP=lb$I79O$F{EAvhC92vj;XV zYl51awdTUhJ0EV`Ir!w|nf=@6nM*;^aGq0=3E&QO!z|F+) zn@Oa!UsiFG%j7nOy$dFmUJce$VBlq9_#iNKUL!|+BCq{x>8%}i{C9Z(4MGM8gA@IJ zyIc!k1k9@W4c2^S|4( z*MCAb705;y%=_7B+J76>O%J+WvfD(ms-K1$ta@gj#gop}PkI?%X-J<-{r?i*OqJNB-|C_lLi=7iI~*WPH)+rvx>)A-??6jU7*1 zycv|^{dR2mH4kcVq4=wm&HBZ&XXDIttcI)(MMX15SH9ve5BLZw#?rqVz%YXAeP8Z)UTCw}+@l%&CoI1L1 z=gwtaVo-x!mTWuu;Kl8SPd_}o_2}%DS&>kK%jRs@b@1r9vlq{uJ8@vsvz`?}87Qhey)&l_VK;0$) diff --git a/tests/data/gsp/test.zarr/108/.zarray b/tests/data/gsp/test.zarr/108/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/108/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "!vCD92nN^o0gj*vRj!+fq|Ea;e&vrrGQUl->s}8dghxRYF(ZUGzb|W4BnE^ zXZ4{)?~bW*S`yRzI)}$lgC!JJWF&l;#ZYwUjpvM6>PkPM2CL}W$SsNe5Wnh7`k{A0 zt=!0_0@(o9n)K-dzMW_-KTdWlZgIhWV-~d>!`7m35#7Z#ven z|4BCAb)AAQ#v=FL9>!_#mYrNYngvQ*HvfK9dQ0MaZ$8xEWYdicVt9)gM6cX7vay}A zRt;*f^vctxjr{oo8VuFNA8-2i4yVBn)#kQt-XNQ_gWsma+&A;=E2zQ8ivng|s;FQ{ zalQLwdD7eN0GPoucHg((pObXaWKMd(XAeG{2JhBMUvpT6X(F|4r@ b#-5RZ2W%9NK7+J9STm0y1Dhg409XeAb}G{% diff --git a/tests/data/gsp/test.zarr/109/.zarray b/tests/data/gsp/test.zarr/109/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/109/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "4?Ug znGeZOAQNG(SBcPj;oGb)yz^w7`qwD;Ld{h_Qzx{EA@65teP`*KjOwNTpyvAAc(LV} z%#t?oo7V1+wtf-bjLY2CKg_AG>ndVuJEWfpCtp*Cnp+an_11~u!uP+WzAxvdsCpMd z&E?p|5SDPdxF)PuV%ejgas2JL%q=>jB;7dOEVRM7#W5_^F#&3>;`Ox~*D>5x*nh!K zcG17CJz7w6Whdl5P;JVY=N=iza&p0%kE*!L<=-MWulC^c_tNp@-`8>T@9VA8TbtucOI{*Lx diff --git a/tests/data/gsp/test.zarr/11/.zarray b/tests/data/gsp/test.zarr/11/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/11/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "Kscc;RQCzUc>6Ww3-r09_%12+@H zZ>Gopg_qsjJ;$#jbEA+9W76+;3Jg3<3?BsdW!7~6vf4d!I(LV@w7cW8iLZf1ApwNR zcUYZ7k8YXw^#TLO>iP)+>A6so?RKqcD*Sri^=u-K{h|+%F|VN}U)c3`?SW3`g<^9L zO9jguEsEZaWGs-0F!^|os-ZYj%hlBk{tR+kA{1^zO}_EefF*bBvS7uQi3j-3F{b{4 znryBbV{-8Ls_(9S!exwInr*;W4Mqe; zva8i>k=a*f1T(CAdG6X3w_2#d;ZG}ze?E-KT(t4-Kk3)I891R%mf5g+*F}fb&Cewd zGM}Ar<+OqzE|Wtyp1Jqz`P0|$KRh~fc>T1D7f_Rf((6 diff --git a/tests/data/gsp/test.zarr/110/.zarray b/tests/data/gsp/test.zarr/110/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/110/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "lQ-^E^^y;AUd@ z%`|83t)s^R{v2a)W{5R(UGP{yfq|Ea;e&vW<_vC;GtBoV8*^>TSU&X<&>&=hF!%?EW}tQC|w+vc|7 z{@VS>rUKargY&IkZQt`M>h(+0_3c^9Raf#r4X%`Mo~&uO`7lG-3B`<`8y0VX8eFt; zhlGi$|Bjuj9(`D{^W++w2J?5Z89%qmx>@?*mPKdV490IzgXi(TI*?g@;Wxu`>1_s= zl22qn4Sw-GWB=p#tIsxEyy|jveXS8rgEJ#$&GneRFU4`_8uWTCB;0G0a3&LY9uGzGHnisx^V+u}#wfmPJx_JA>=_v#(Z!?BAZ-uU%wx#FrpOQg)&T&zfZyl< diff --git a/tests/data/gsp/test.zarr/111/.zarray b/tests/data/gsp/test.zarr/111/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/111/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "npD`!w-#`pp!SlJI+E4=G`;jJKeNaZqnO^DYyN6 zS0I}TWFrjL*?mzeHl%x1n_`7A`+|0Tn89}wwJ+3f?P6%Qi@v0C`h5-y)Zn~HxypyD z-_4)8!)~SUX?ffR2j#^Y9ogn0otnS7wNdo*5>cp&53Qdl`=)h2!#9u4@c(!Dnxmlx zpL!q|H1)UOxAgOYf`<;3dEj*MTO~%GwV!|f_%z4TRD<iI9))W@5Xw@^EeGoNVj%6FQwCb*-bL9WuXQiFuR?l z@uHf+Ab)wQ)pe8glc5IJIJA9S)4BJow#M~4jc*y`aT;tm|ID=~&mKLxd*{;0T^pBm zszD7-UUl%|o#)R#ynA-<-knPaR|G;0E}XGp*TJJFPF}cj?bP12lYP((Rsr%s$xX(d Zk%0$n6pucGv^`iek0Aq_B0~UJ2LL8-)7Jn1 diff --git a/tests/data/gsp/test.zarr/112/.zarray b/tests/data/gsp/test.zarr/112/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/112/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "5Iz;>7~!CUQEg1sOwc^;AUd@ z&Gha0CaHFYl*wVNAxz(mBrZHrVBlq9_#iONK~LFhm$S2@_NT8t;#YuXq5y=!a~jqp z=aYqETHRctYie}qq72kv50|y){_;%o_6}GY@MGQt zb`)cgU2OGd`n&n-jaCS5mDc~L5j8stYViN#1u^#-bU$+F7ruC{C;77+YH-Hk2cKsg zdTZOad^#R#aLSp;^b-tw?IS+q zxXORMbU_ws@MVopPa4IyAFpp-l*eWCBp0W_@~L5K&VKrG{lc7O-iHFO74L)^+{3;g z)SMx9#@-zPx7V$#JzNAe*zS<;LJ_eo;~^S?!|)#H!ok>ynMoKsKJHnH|;-q;o8l6*LEG)ziN^Un!!pyJ}9|K c+cPrA*n!RB)@R^1VBq65WMB(m2mq@G0Ib*B0ssI2 diff --git a/tests/data/gsp/test.zarr/114/.zarray b/tests/data/gsp/test.zarr/114/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/114/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "FXmWpyrzHe7ui=IsIVWA&JxmFs~-1oDVwlkC!9}+py z^XAl!$3LLvJ~I1i#(7lf)w9o&^Hi*DaGPt;x&P*)2R9$weRS`^h5hRjKS0fOU32Bx zhj*`^K7MfT=B1-+%b@0_Puh3kG6-Beb$HYCD0FjGfdZg(CT-8iAY%tMid!GZlV{*# PV&FDpU{_=a0P6t&xfj+= diff --git a/tests/data/gsp/test.zarr/116/.zarray b/tests/data/gsp/test.zarr/116/.zarray deleted file mode 100644 index 8fe50e27..00000000 --- a/tests/data/gsp/test.zarr/116/.zarray +++ /dev/null @@ -1,20 +0,0 @@ -{ - "chunks": [ - 145 - ], - "compressor": { - "blocksize": 0, - "clevel": 5, - "cname": "lz4", - "id": "blosc", - "shuffle": 1 - }, - "dtype": "N#nZacVAVgc>}X!Kgqb z_Vf-(-D?3Ua$k53;57Kj?Mo7~^4~-<)XaJw^Wzvh)L@a5%}PP56_$5?*w(oFlxLD9 z)L@S}7nOF*U-u`Xi-|LRTfN0}oCeS6kml$z=32)PllObZ?)$f)2H&p;&Y$r5%8AKx z_t?c(ecxUWHF$1fq5a>>&Yb+g`3-MVW^@VSGkYsgO6x5TFu^ic>SC`YXY_%biR5Ar@=uJ7py#a=kfawFYaGE zdtgy0)L^cK2QOT?^YqdEdk-Jnx^ie~Ak^TGaT8{*-MSqJw(Q)td2x#dn!&0-J}9|K i+cProDl%{}F>vcK@Eb7j3D~JHa4<2j`7;E7wEzIE=+(*q diff --git a/tests/data/gsp/test.zarr/datetime_gmt/.zarray b/tests/data/gsp/test.zarr/datetime_gmt/.zarray index 7f5ce911..44c9a184 100644 --- a/tests/data/gsp/test.zarr/datetime_gmt/.zarray +++ b/tests/data/gsp/test.zarr/datetime_gmt/.zarray @@ -1,6 +1,6 @@ { "chunks": [ - 145 + 49 ], "compressor": { "blocksize": 0, @@ -14,7 +14,7 @@ "filters": null, "order": "C", "shape": [ - 145 + 49 ], "zarr_format": 2 -} \ No newline at end of file +} diff --git a/tests/data/gsp/test.zarr/datetime_gmt/.zattrs b/tests/data/gsp/test.zarr/datetime_gmt/.zattrs index 9487f1c5..e9976b75 100644 --- a/tests/data/gsp/test.zarr/datetime_gmt/.zattrs +++ b/tests/data/gsp/test.zarr/datetime_gmt/.zattrs @@ -3,5 +3,5 @@ "datetime_gmt" ], "calendar": "proleptic_gregorian", - "units": "minutes since 2019-01-01 00:00:00" -} \ No newline at end of file + "units": "minutes since 2019-01-01" +} diff --git a/tests/data/gsp/test.zarr/datetime_gmt/0 b/tests/data/gsp/test.zarr/datetime_gmt/0 index 7e859e54ee92ebd92537f8d16bf254415b0e7704..76b3c55515371e8758dbee65b015ce50d7d08a60 100644 GIT binary patch delta 98 zcmcb>)Ws;p#AwLT!N|Y>!d(mu3?e{W1jJuMCQ2CE0~rv&2m?$|zzhQ{P{0ZX3`Pw6 LObq{71Hjq<4hIN9 literal 336 zcmZQ#ROIMjVPF8^07eD|5e5c^i9pOCXA@O1ZOf$(d^%nkZA%V4_$Ol(Q8s1Mg?Bt! z9%(I$4&3`AWf4{~X~VfUTpDgEO$+wj`6Xc%QZ!-RnO7WYE=djZ_T2g*W)f7;x8~Ff zHWjCYx;eXUd=oJW$m>~k;u(vQLtM?Q9oN1H8TjRNtvL3CNx?3rYR0xJp9J)LvO1O> zd4yrVC(unI42%pSjEoGzOhB3$NV5QGRv^s=q}hQq2ax6j(p*5A8%Xm2XlH464*WBhP#~5uh+H!50 zBXZ5==83}N>2j4t@$32H^ZvYEpU;1ve?B|-(uC9$0R9sS0U!wgTEAVb{^+l(YVwCJ zz*+%-YRo)1fRFgkJh`Y3>FviH!5hUtvxpq=5G^D;JodJ2o^O#QAHQ7c`wj8p!`6)HW!_=RDY4w75^IyeraAB-^^rRk3Y?It) zQ}WSY)#vncP3Ry_b6SGeB3~TYI;AIzHPJXTkO5N*>L&^3BM3!kWS=fE-d`YW(Htxb zWzglOOZul>T3*D_|G;(A6a64)N8wddtndkwpOv2)drh%h1S@?Axs14!4^elbauj@p zF{bGZHz)_L`f*@d2~~M>W#*mirmZ1YRr1PAx;I$sPV)Z7i~qAnHWi2_jQEFDj&m8SH?vuB~Ph& zB=zq&7^{#0A}01#zGxZM&^McDn)FoS542(l442OIT|5t4Ij*d|Mh3RupInpt3bQPd zVTbp?cd?U`A?C+_#$XEiqM?M?;05c4>_s1@nkRh>95pW@lR$G}P7z<6K63CvwgDRJ zo1i#lUo&q*%fv#-n?;$m09~Ds3}Fj#rSG$_w~o)o^#lZdwxticz4)tPDvNaiX^hU_+&=Kkx{8d&F}eeH*RhM^5uN(_yMUvbgZw) zLU=&-O0VK!Xm`r4)@S|j?7(wHaCZ|rnyWkbfZ(}0B%H_ji~Zx?PeU!;=h;g+OYlBB zST0G^f%wwe(iSuG{l_Fx2?3-M!L5iEd(QEs<@#m+k2dYoehbh&uQO^OQG?GZT2!BC z#rTR@V)sL`*Y{wmy8azSss8(*S9fvdcxxDFVKYOAlRya~0rK-97S3vK19|O>lf<<> zmFrjB0Pb7|1k6qftfxpf;^jOCZA4d{PQS|!rGxTAilj}?Bp$7^G^`O zxb!(jBqZFE)pqCH951@NUc(Hy<2za+zZE5Z>-L#+5Ln-zK8@i*#*2O*`b_R@`P^8L zoCP`bVqT&*ROWdtNy*sui}L~nyF_;1(OMpD1)~!LC;QCYW^uf2M4f1?Q1kN`F$fqK zHA6Yr>{Ux4n$&QEa$kQoc?phN7FiF`cvrtK7{n0`9WDEg15aTy;PjYvXtbONDsP}x z^WV=C)!O%4|2z=5_adyNaJ}~3IOXl*dQdxc zu(-_$lc5oAwJ4P^qW3mKzz#Gv7LQ#iowk##vD!|l*OyPqQO1CpQG{V!9NWbtW()?X3NM$9Kd zPkT-)%OT7L5GJjTfij6Xse*h9b@FYl5|OwpK%%QcahR?Z##4LXrZz^2|ye(G`4-|gAqO=iPIwSCCe;l1C1%~26d8!^s^MQ$>Zl*c= zqKlQW1HdslVm5v7xXLpt+T`++1EhI)a;|)Ju&Y7~&+)>5PB=&0j~xkmdL;zEu40LnQ5h0Z=W|2|g1-R3WqsPiQGExpK8X)WTuEi#2J^adh#f+37Ej zE2MY#IwgWAo0DyB!l&HfLKxFK`wa2RwpK?~Z#VR-%gAw;(Wuh%| zZL>ANGMDDzsCjN3)7%&byZIXhu(kq7rS}o+mn?+M?i=Hy@V8P*(7by4V@G2=>Y8NF zlJ@ve0Z4xI=~J-6#_?}kMj?A_rKjYU3?bK-t)$VWEfyu%vUbj*O@Et;C&N4jM$e;P zqZkwH2MldYNg1u$8jfYuWJ)LS(WLpR90Y{FZTP(tz6W$0b#J?VO1J4*jhOT7b&WGa zhuq-ygVhta%uoN^{ZIrwryfAY;iXK0#&y4R<&TLD?5Qq8MnT*jzcx%9eEIRWm)EGR zY~LeJ2vo^2%#KFl!MUX(@1J%kg_{6AgTLvv$ORs(Zhled+rNs`S~LcbmIGwDGR*55MB9p(&= zz2v=YW>SlLyLw(SSX~VxL!4pG7@bF@w$v7Py?xWfXkt)$*lZ4asFhbwc~z0aV%629 zGf1`dy>HpS%5k4v+Ix{mfCn}$p8k=qAcl83=?}m>AT(Gg4xg-7ZJq%Pa0Xc6 z@P^+^ZyOmI85P#+``fj4vdU|G7Jo?3KEIz3a06hb`VO)~YD%-25zhMcj;9hL%eVo;R`^^|xrGtkt%Z znqT93{se>4viGG4;rWqi&Y=axLE;i#iIx%!5~d;!3=Uih6PzF4(cZw~c;nlu-L<9L S!|%Rb4Wjc)w;#{C&j Date: Thu, 30 Sep 2021 14:55:22 +0100 Subject: [PATCH 7/8] use concurrent methods to get quicker data from pvlive api --- nowcasting_dataset/data_sources/gsp/pvlive.py | 71 +++++++++++-------- 1 file changed, 40 insertions(+), 31 deletions(-) diff --git a/nowcasting_dataset/data_sources/gsp/pvlive.py b/nowcasting_dataset/data_sources/gsp/pvlive.py index a7ffd6c8..c04f843c 100644 --- a/nowcasting_dataset/data_sources/gsp/pvlive.py +++ b/nowcasting_dataset/data_sources/gsp/pvlive.py @@ -4,6 +4,8 @@ from pvlive_api import PVLive from typing import Optional import pytz +from tqdm import tqdm +from concurrent import futures from nowcasting_dataset.data_sources.gsp.eso import get_list_of_gsp_ids @@ -40,21 +42,22 @@ def load_pv_gsp_raw_data_from_pvlive( gsp_data_df = [] logger.debug(f"Will be getting data for {len(gsp_ids)} gsp ids") # loop over gsp ids - for gsp_id in gsp_ids: - - one_gsp_data_df = [] - - # set the first chunk start and end times - start_chunk = first_start_chunk - end_chunk = first_end_chunk - - # loop over 30 days chunks (nice to see progress instead of waiting a long time for one command - this might - # not be the fastest) - while start_chunk <= end: - logger.debug(f"Getting data for gsp id {gsp_id} from {start_chunk} to {end_chunk}") - - one_gsp_data_df.append( - pvl.between( + # limit the total number of concurrent tasks to be 4, so that we don't hit the pvlive api too much + future_tasks = [] + with futures.ThreadPoolExecutor(max_workers=4) as executor: + for gsp_id in gsp_ids: + + # set the first chunk start and end times + start_chunk = first_start_chunk + end_chunk = first_end_chunk + + # loop over 30 days chunks (nice to see progress instead of waiting a long time for one command - this might + # not be the fastest) + while start_chunk <= end: + logger.debug(f"Getting data for gsp id {gsp_id} from {start_chunk} to {end_chunk}") + + task = executor.submit( + pvl.between, start=start_chunk, end=end_chunk, entity_type="gsp", @@ -62,30 +65,36 @@ def load_pv_gsp_raw_data_from_pvlive( extra_fields="installedcapacity_mwp", dataframe=True, ) - ) - # add 30 days to the chunk, to get the next chunk - start_chunk = start_chunk + CHUNK_DURATION - end_chunk = end_chunk + CHUNK_DURATION + future_tasks.append(task) + + # add 30 days to the chunk, to get the next chunk + start_chunk = start_chunk + CHUNK_DURATION + end_chunk = end_chunk + CHUNK_DURATION - if end_chunk > end: - end_chunk = end + if end_chunk > end: + end_chunk = end - # join together one gsp data, and sort - one_gsp_data_df = pd.concat(one_gsp_data_df) - one_gsp_data_df = one_gsp_data_df.sort_values(by=["gsp_id", "datetime_gmt"]) + logger.debug(f"Getting results") + # Collect results from each thread. + for task in tqdm(future_tasks): + one_chunk_one_gsp_gsp_data_df = task.result() - # normalize - if normalize_data: - one_gsp_data_df["generation_mw"] = ( - one_gsp_data_df["generation_mw"] / one_gsp_data_df["installedcapacity_mwp"] - ) + if normalize_data: + one_chunk_one_gsp_gsp_data_df["generation_mw"] = ( + one_chunk_one_gsp_gsp_data_df["generation_mw"] + / one_chunk_one_gsp_gsp_data_df["installedcapacity_mwp"] + ) - # append to longer list - gsp_data_df.append(one_gsp_data_df) + # append to longer list + gsp_data_df.append(one_chunk_one_gsp_gsp_data_df) + # join together gsp data gsp_data_df = pd.concat(gsp_data_df) + # sort + gsp_data_df = gsp_data_df.sort_values(by=["gsp_id", "datetime_gmt"]) + # remove any extra data loaded gsp_data_df = gsp_data_df[gsp_data_df["datetime_gmt"] <= end] From 9e2c94feec3dcafa6f43a06a2ee534611fae0591 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Thu, 30 Sep 2021 16:37:46 +0100 Subject: [PATCH 8/8] pylint --- nowcasting_dataset/data_sources/gsp/pvlive.py | 1 - nowcasting_dataset/dataset/split/__init__.py | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/nowcasting_dataset/data_sources/gsp/pvlive.py b/nowcasting_dataset/data_sources/gsp/pvlive.py index e0b4e225..d68ba519 100644 --- a/nowcasting_dataset/data_sources/gsp/pvlive.py +++ b/nowcasting_dataset/data_sources/gsp/pvlive.py @@ -125,7 +125,6 @@ def get_installed_capacity( Returns: pd.Series of installed capacity indexed by gsp_id """ - logger.debug(f"Getting all installed capacity at {start}") # get a lit of gsp ids diff --git a/nowcasting_dataset/dataset/split/__init__.py b/nowcasting_dataset/dataset/split/__init__.py index e69de29b..c7eb89d1 100644 --- a/nowcasting_dataset/dataset/split/__init__.py +++ b/nowcasting_dataset/dataset/split/__init__.py @@ -0,0 +1 @@ +""" split functions """