From abf9fa622d2fc9beb3c1e8289563874b9105057e Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Thu, 11 Nov 2021 15:21:55 +0000 Subject: [PATCH 01/10] make pv coords more relastic --- nowcasting_dataset/data_sources/fake.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/nowcasting_dataset/data_sources/fake.py b/nowcasting_dataset/data_sources/fake.py index 309ea1bf..9e9cbbdd 100644 --- a/nowcasting_dataset/data_sources/fake.py +++ b/nowcasting_dataset/data_sources/fake.py @@ -19,6 +19,7 @@ join_list_data_array_to_batch_dataset, join_list_dataset_to_batch_dataset, ) +from nowcasting_dataset.geospatial import lat_lon_to_osgb def gsp_fake( @@ -211,7 +212,7 @@ def create_gsp_pv_dataset( data = convert_data_array_to_dataset(data_array) x_coords = xr.DataArray( - data=np.sort(np.random.randn(number_of_systems)), + data=10 ** 4 * np.random.randn(number_of_systems), dims=["id_index"], coords=dict( id_index=range(number_of_systems), @@ -219,15 +220,26 @@ def create_gsp_pv_dataset( ) y_coords = xr.DataArray( - data=np.sort(np.random.randn(number_of_systems)), + data=10 ** 4 * np.random.randn(number_of_systems), dims=["id_index"], coords=dict( id_index=range(number_of_systems), ), ) - data["x_coords"] = x_coords - data["y_coords"] = y_coords + # make first coords centroid + x_coords.data[0] = x_coords.data.mean() + y_coords.data[0] = y_coords.data.mean() + + # make random lat and long + lat = np.random.randint(51, 55) + lon = np.random.randint(-2.5, 1) + + # turn into OSGB + x, y = lat_lon_to_osgb(lat=lat, lon=lon) + + data["x_coords"] = x_coords + x + data["y_coords"] = y_coords + y # Add 1000 to the id numbers for the row numbers. # This is a quick way to make sure row number is different from id, From 4e08213a86797dde39e292aa5ba3de2e33638fe8 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Thu, 11 Nov 2021 17:14:59 +0000 Subject: [PATCH 02/10] smooth pv data --- nowcasting_dataset/data_sources/fake.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/nowcasting_dataset/data_sources/fake.py b/nowcasting_dataset/data_sources/fake.py index 9e9cbbdd..0af165e0 100644 --- a/nowcasting_dataset/data_sources/fake.py +++ b/nowcasting_dataset/data_sources/fake.py @@ -201,11 +201,24 @@ def create_gsp_pv_dataset( "id": np.random.randint(low=0, high=1000, size=number_of_systems), } coords = [(dim, ALL_COORDS[dim]) for dim in dims] + + # make pv yield + data = np.random.randn( + seq_length, + number_of_systems, + ) + data = data.clip(min=0) + + # smooth the data, the convolution method smooeths that data across systems first, + # and then a bit across time (depending what you set N) + N = int(seq_length / 2) + data = np.convolve(data.ravel(), np.ones(N) / N, mode="same").reshape( + (seq_length, number_of_systems) + ) + + # make into a Data Array data_array = xr.DataArray( - np.random.randn( - seq_length, - number_of_systems, - ), + data, coords=coords, ) # Fake data for testing! From b25186b62c92ca7de8a106101a79dec9d2793311 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 12 Nov 2021 15:28:41 +0000 Subject: [PATCH 03/10] update for coords for satellite --- nowcasting_dataset/data_sources/fake.py | 67 +++++++++++++------ .../satellite/satellite_data_source.py | 3 +- 2 files changed, 50 insertions(+), 20 deletions(-) diff --git a/nowcasting_dataset/data_sources/fake.py b/nowcasting_dataset/data_sources/fake.py index 0af165e0..eb1950ee 100644 --- a/nowcasting_dataset/data_sources/fake.py +++ b/nowcasting_dataset/data_sources/fake.py @@ -161,6 +161,36 @@ def topographic_fake(batch_size, image_size_pixels): return Topographic(xr_dataset) +def add_uk_centroid_osgb(x, y): + """Add a OSGB value to make coords in center of UK""" + lat = np.random.uniform(51, 55) + lon = np.random.uniform(-2.5, 1) + + x_center, y_center = lat_lon_to_osgb(lat=lat, lon=lon) + + x = x + x_center + y = y + y_center + + return x, y + + +def make_random_point_coords_osgb(size: int): + """Make random coords [OSGB] for pv site, of gsp""" + # this is about 100KM + x = np.sort(np.random.randint(0, 10 ** 5, size)) + y = np.sort(np.random.randint(0, 10 ** 5, size)) + + return add_uk_centroid_osgb(x, y) + + +def make_random_image_coords_osgb(size: int): + """Make random coords for image. These are ranges for the pixels""" + x = 4 * 10 ** 3 * np.array((range(0, size))) + y = 4 * 10 ** 3 * np.array((range(0, size))) + + return add_uk_centroid_osgb(x, y) + + def create_image_array( dims=("time", "x", "y", "channels"), seq_length_5=19, @@ -168,24 +198,27 @@ def create_image_array( channels=SAT_VARIABLE_NAMES, ): """Create Satellite or NWP fake image data""" + + x, y = make_random_image_coords_osgb(size=image_size_pixels) + ALL_COORDS = { "time": pd.date_range("2021-01-01", freq="5T", periods=seq_length_5), - "x": np.random.randint(low=0, high=1000, size=image_size_pixels), - "y": np.random.randint(low=0, high=1000, size=image_size_pixels), + "x": x, + "y": y, "channels": np.array(channels), } coords = [(dim, ALL_COORDS[dim]) for dim in dims] image_data_array = xr.DataArray( - abs( - np.random.randn( - seq_length_5, - image_size_pixels, - image_size_pixels, - len(channels), + abs( # to make sure average is about 100 + np.random.uniform( + 0, + 200, + size=(seq_length_5, image_size_pixels, image_size_pixels, len(channels)), ) ), coords=coords, ) # Fake data for testing! + return image_data_array @@ -224,8 +257,11 @@ def create_gsp_pv_dataset( data = convert_data_array_to_dataset(data_array) + # make random coords + x, y = make_random_point_coords_osgb(size=number_of_systems) + x_coords = xr.DataArray( - data=10 ** 4 * np.random.randn(number_of_systems), + data=x, dims=["id_index"], coords=dict( id_index=range(number_of_systems), @@ -233,7 +269,7 @@ def create_gsp_pv_dataset( ) y_coords = xr.DataArray( - data=10 ** 4 * np.random.randn(number_of_systems), + data=y, dims=["id_index"], coords=dict( id_index=range(number_of_systems), @@ -244,15 +280,8 @@ def create_gsp_pv_dataset( x_coords.data[0] = x_coords.data.mean() y_coords.data[0] = y_coords.data.mean() - # make random lat and long - lat = np.random.randint(51, 55) - lon = np.random.randint(-2.5, 1) - - # turn into OSGB - x, y = lat_lon_to_osgb(lat=lat, lon=lon) - - data["x_coords"] = x_coords + x - data["y_coords"] = y_coords + y + data["x_coords"] = x_coords + data["y_coords"] = y_coords # Add 1000 to the id numbers for the row numbers. # This is a quick way to make sure row number is different from id, diff --git a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py index 4dfe64b6..9860eb1e 100644 --- a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py +++ b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py @@ -44,6 +44,7 @@ def open(self) -> None: """ self._data = self._open_data() self._data = self._data.sel(variable=list(self.channels)) + self._data = self._data.rename({"variable"}) def _open_data(self) -> xr.DataArray: return open_sat_data(zarr_path=self.zarr_path, consolidated=self.consolidated) @@ -138,7 +139,7 @@ def open_sat_data(zarr_path: str, consolidated: bool) -> xr.DataArray: zarr_path, engine="zarr", consolidated=consolidated, mode="r", chunks=None ) - data_array = dataset["stacked_eumetsat_data"] + data_array = dataset["stacked_eumetsat_data"].rename({"stacked_eumetsat_data": "data" "va"}) del dataset # The 'time' dimension is at 04, 09, ..., 59 minutes past the hour. From 7da6a042e54bcbcb3c2c0f9476e8b281320b6c3e Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 12 Nov 2021 15:38:27 +0000 Subject: [PATCH 04/10] fix for satellite data --- .../data_sources/satellite/satellite_data_source.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py index 9860eb1e..b633ad3a 100644 --- a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py +++ b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py @@ -44,7 +44,8 @@ def open(self) -> None: """ self._data = self._open_data() self._data = self._data.sel(variable=list(self.channels)) - self._data = self._data.rename({"variable"}) + if "variable" in self._data.dims: + self._data = self._data.rename({"variable": "channels"}) def _open_data(self) -> xr.DataArray: return open_sat_data(zarr_path=self.zarr_path, consolidated=self.consolidated) @@ -139,7 +140,9 @@ def open_sat_data(zarr_path: str, consolidated: bool) -> xr.DataArray: zarr_path, engine="zarr", consolidated=consolidated, mode="r", chunks=None ) - data_array = dataset["stacked_eumetsat_data"].rename({"stacked_eumetsat_data": "data" "va"}) + data_array = dataset["stacked_eumetsat_data"] + if "stacked_eumetsat_data" == data_array.name: + data_array.name = "data" del dataset # The 'time' dimension is at 04, 09, ..., 59 minutes past the hour. From 09743e117172f074ca1d2bfbc9af41a3ba2b56f1 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Tue, 16 Nov 2021 09:20:36 +0000 Subject: [PATCH 05/10] remove sort in random coords --- nowcasting_dataset/data_sources/fake.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nowcasting_dataset/data_sources/fake.py b/nowcasting_dataset/data_sources/fake.py index 98dbd519..a158fb08 100644 --- a/nowcasting_dataset/data_sources/fake.py +++ b/nowcasting_dataset/data_sources/fake.py @@ -189,8 +189,8 @@ def add_uk_centroid_osgb(x, y): def make_random_point_coords_osgb(size: int): """Make random coords [OSGB] for pv site, of gsp""" # this is about 100KM - x = np.sort(np.random.randint(0, 10 ** 5, size)) - y = np.sort(np.random.randint(0, 10 ** 5, size)) + x = np.random.randint(0, 10 ** 5, size) + y = np.random.randint(0, 10 ** 5, size) return add_uk_centroid_osgb(x, y) From 799bc8b16b3bc696e2b7bd133fa6dc29a9209511 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 19 Nov 2021 10:34:41 +0000 Subject: [PATCH 06/10] Some PR comments --- nowcasting_dataset/data_sources/fake.py | 31 +++++++++++++++++++------ 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/nowcasting_dataset/data_sources/fake.py b/nowcasting_dataset/data_sources/fake.py index 275818d0..9b795089 100644 --- a/nowcasting_dataset/data_sources/fake.py +++ b/nowcasting_dataset/data_sources/fake.py @@ -198,31 +198,48 @@ def topographic_fake(batch_size, image_size_pixels): def add_uk_centroid_osgb(x, y): - """Add a OSGB value to make coords in center of UK""" + """ + Add an OSGB value to make coords in center of UK + + Args: + x: random values, OSGB + y: random values, OSGB + + Returns: X,Y random coordinates [OSGB] + """ lat = np.random.uniform(51, 55) lon = np.random.uniform(-2.5, 1) x_center, y_center = lat_lon_to_osgb(lat=lat, lon=lon) + # make average 0 + x = x - x.mean() + y = y - y.mean() + + # put in the uk x = x + x_center y = y + y_center return x, y -def make_random_point_coords_osgb(size: int): +def create_random_point_coordinates_osgb(size: int): """Make random coords [OSGB] for pv site, of gsp""" # this is about 100KM - x = np.random.randint(0, 10 ** 5, size) - y = np.random.randint(0, 10 ** 5, size) + HUNDRED_KILOMETERS = 10 ** 5 + x = np.random.randint(0, HUNDRED_KILOMETERS, size) + y = np.random.randint(0, HUNDRED_KILOMETERS, size) return add_uk_centroid_osgb(x, y) def make_random_image_coords_osgb(size: int): """Make random coords for image. These are ranges for the pixels""" - x = 4 * 10 ** 3 * np.array((range(0, size))) - y = 4 * 10 ** 3 * np.array((range(0, size))) + + ONE_KILOMETER = 10 ** 3 + + x = 4 * ONE_KILOMETER * np.array((range(0, size))) + y = 4 * ONE_KILOMETER * np.array((range(0, size))) return add_uk_centroid_osgb(x, y) @@ -315,7 +332,7 @@ def create_gsp_pv_dataset( data = data_array.to_dataset(name="power_mw") # make random coords - x, y = make_random_point_coords_osgb(size=number_of_systems) + x, y = create_random_point_coordinates_osgb(size=number_of_systems) x_coords = xr.DataArray( data=x, From beceb57ac42368ad0793bebf349986d94f51dce1 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 19 Nov 2021 10:42:24 +0000 Subject: [PATCH 07/10] add comment --- nowcasting_dataset/data_sources/fake.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nowcasting_dataset/data_sources/fake.py b/nowcasting_dataset/data_sources/fake.py index 9b795089..0efedade 100644 --- a/nowcasting_dataset/data_sources/fake.py +++ b/nowcasting_dataset/data_sources/fake.py @@ -238,6 +238,7 @@ def make_random_image_coords_osgb(size: int): ONE_KILOMETER = 10 ** 3 + # 4 kilometer spacing seemed about right for real satellite images x = 4 * ONE_KILOMETER * np.array((range(0, size))) y = 4 * ONE_KILOMETER * np.array((range(0, size))) From f3ecef65a7022e12e43d64ac2597537d97271685 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 19 Nov 2021 10:48:16 +0000 Subject: [PATCH 08/10] make satellite data source more robust, incase channels is already there --- .../data_sources/satellite/satellite_data_source.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py index b5ace2e6..d8d05826 100644 --- a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py +++ b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py @@ -127,7 +127,9 @@ def get_example( y_center_osgb=y_meters_center, ) - selected_data = selected_data.rename({"variable": "channels"}) + if "variable" in list(selected_data.data_vars): + selected_data = selected_data.rename({"variable": "channels"}) + selected_data = self._post_process_example(selected_data, t0_dt) if selected_data.shape != self._shape_of_example: From 7bbec76b0bdd205a266459c2b9eb9054bfc98c80 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 19 Nov 2021 10:55:30 +0000 Subject: [PATCH 09/10] use dims not data vars --- .../data_sources/satellite/satellite_data_source.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py index d8d05826..30efbe44 100644 --- a/nowcasting_dataset/data_sources/satellite/satellite_data_source.py +++ b/nowcasting_dataset/data_sources/satellite/satellite_data_source.py @@ -127,7 +127,7 @@ def get_example( y_center_osgb=y_meters_center, ) - if "variable" in list(selected_data.data_vars): + if "variable" in list(selected_data.dims): selected_data = selected_data.rename({"variable": "channels"}) selected_data = self._post_process_example(selected_data, t0_dt) From 955c5c33487cf386b10e23bb4f1083393ddd32d9 Mon Sep 17 00:00:00 2001 From: peterdudfield Date: Fri, 19 Nov 2021 11:04:02 +0000 Subject: [PATCH 10/10] comments --- nowcasting_dataset/data_sources/fake.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/nowcasting_dataset/data_sources/fake.py b/nowcasting_dataset/data_sources/fake.py index 0efedade..2a77ae01 100644 --- a/nowcasting_dataset/data_sources/fake.py +++ b/nowcasting_dataset/data_sources/fake.py @@ -199,7 +199,7 @@ def topographic_fake(batch_size, image_size_pixels): def add_uk_centroid_osgb(x, y): """ - Add an OSGB value to make coords in center of UK + Add an OSGB value to make in center of UK Args: x: random values, OSGB @@ -207,9 +207,10 @@ def add_uk_centroid_osgb(x, y): Returns: X,Y random coordinates [OSGB] """ + + # get random OSGB center in the UK lat = np.random.uniform(51, 55) lon = np.random.uniform(-2.5, 1) - x_center, y_center = lat_lon_to_osgb(lat=lat, lon=lon) # make average 0