Skip to content
This repository was archived by the owner on Sep 11, 2023. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 87 additions & 18 deletions nowcasting_dataset/data_sources/fake.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
convert_coordinates_to_indexes_for_list_datasets,
join_list_dataset_to_batch_dataset,
)
from nowcasting_dataset.geospatial import lat_lon_to_osgb


def gsp_fake(
Expand Down Expand Up @@ -196,32 +197,84 @@ def topographic_fake(batch_size, image_size_pixels):
return Topographic(xr_dataset)


def add_uk_centroid_osgb(x, y):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a bit lost in this function.

What does x and y do to the random lat/lon values?

Is it more like "create a random OSGB value in the centre of UK influenced by x,y "?
I think it would help to change the comment and make the function name a bit more descriptive.

"""
Add an OSGB value to make in center of UK

Args:
x: random values, OSGB
y: random values, OSGB

Returns: X,Y random coordinates [OSGB]
"""

# get random OSGB center in the UK
lat = np.random.uniform(51, 55)
lon = np.random.uniform(-2.5, 1)
x_center, y_center = lat_lon_to_osgb(lat=lat, lon=lon)

# make average 0
x = x - x.mean()
y = y - y.mean()

# put in the uk
x = x + x_center
y = y + y_center

return x, y


def create_random_point_coordinates_osgb(size: int):
"""Make random coords [OSGB] for pv site, of gsp"""
# this is about 100KM
HUNDRED_KILOMETERS = 10 ** 5
x = np.random.randint(0, HUNDRED_KILOMETERS, size)
y = np.random.randint(0, HUNDRED_KILOMETERS, size)

return add_uk_centroid_osgb(x, y)


def make_random_image_coords_osgb(size: int):
"""Make random coords for image. These are ranges for the pixels"""

ONE_KILOMETER = 10 ** 3

# 4 kilometer spacing seemed about right for real satellite images
x = 4 * ONE_KILOMETER * np.array((range(0, size)))
y = 4 * ONE_KILOMETER * np.array((range(0, size)))

return add_uk_centroid_osgb(x, y)


def create_image_array(
dims=("time", "x", "y", "channels"),
seq_length_5=19,
image_size_pixels=64,
channels=SAT_VARIABLE_NAMES,
):
"""Create Satellite or NWP fake image data"""

x, y = make_random_image_coords_osgb(size=image_size_pixels)

ALL_COORDS = {
"time": pd.date_range("2021-01-01", freq="5T", periods=seq_length_5),
"x": np.random.randint(low=0, high=1000, size=image_size_pixels),
"y": np.random.randint(low=0, high=1000, size=image_size_pixels),
"x": x,
"y": y,
"channels": np.array(channels),
}
coords = [(dim, ALL_COORDS[dim]) for dim in dims]
image_data_array = xr.DataArray(
abs(
np.random.randn(
seq_length_5,
image_size_pixels,
image_size_pixels,
len(channels),
abs( # to make sure average is about 100
np.random.uniform(
0,
200,
size=(seq_length_5, image_size_pixels, image_size_pixels, len(channels)),
)
),
coords=coords,
name="data",
) # Fake data for testing!

return image_data_array


Expand Down Expand Up @@ -252,11 +305,24 @@ def create_gsp_pv_dataset(
"id": np.random.choice(range(1000), number_of_systems, replace=False),
}
coords = [(dim, ALL_COORDS[dim]) for dim in dims]

# make pv yield
data = np.random.randn(
seq_length,
number_of_systems,
)
data = data.clip(min=0)

# smooth the data, the convolution method smooeths that data across systems first,
# and then a bit across time (depending what you set N)
N = int(seq_length / 2)
data = np.convolve(data.ravel(), np.ones(N) / N, mode="same").reshape(
(seq_length, number_of_systems)
)

# make into a Data Array
data_array = xr.DataArray(
np.random.randn(
seq_length,
number_of_systems,
),
data,
coords=coords,
) # Fake data for testing!

Expand All @@ -267,20 +333,23 @@ def create_gsp_pv_dataset(

data = data_array.to_dataset(name="power_mw")

# make random coords
x, y = create_random_point_coordinates_osgb(size=number_of_systems)

x_coords = xr.DataArray(
data=np.sort(
np.random.choice(range(2 * number_of_systems), number_of_systems, replace=False)
),
data=x,
dims=["id"],
)

y_coords = xr.DataArray(
data=np.sort(
np.random.choice(range(2 * number_of_systems), number_of_systems, replace=False)
),
data=y,
dims=["id"],
)

# make first coords centroid
x_coords.data[0] = x_coords.data.mean()
y_coords.data[0] = y_coords.data.mean()

data["capacity_mwp"] = capacity
data["x_coords"] = x_coords
data["y_coords"] = y_coords
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ def open(self) -> None:
"""
self._data = self._open_data()
self._data = self._data.sel(variable=list(self.channels))
if "variable" in self._data.dims:
self._data = self._data.rename({"variable": "channels"})

def _open_data(self) -> xr.DataArray:
return open_sat_data(zarr_path=self.zarr_path, consolidated=self.consolidated)
Expand Down Expand Up @@ -125,7 +127,9 @@ def get_example(
y_center_osgb=y_meters_center,
)

selected_data = selected_data.rename({"variable": "channels"})
if "variable" in list(selected_data.dims):
selected_data = selected_data.rename({"variable": "channels"})

selected_data = self._post_process_example(selected_data, t0_dt)

if selected_data.shape != self._shape_of_example:
Expand Down Expand Up @@ -281,6 +285,8 @@ def open_sat_data(zarr_path: str, consolidated: bool) -> xr.DataArray:
)

data_array = dataset["stacked_eumetsat_data"]
if "stacked_eumetsat_data" == data_array.name:
data_array.name = "data"
del dataset

# Flip coordinates to top-left first
Expand Down