In [13]:
# https://ipython.readthedocs.io/en/stable/config/extensions/autoreload.html
%load_ext autoreload
# Reload all modules (except those excluded by %aimport) every time before executing the Python code typed:
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
# General imports
from pathlib import Path
import numpy as np
import xarray as xr
import pandas as pd

# Plotting
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (20, 10)
plt.rcParams["figure.facecolor"] = "white"

# power_perceiver imports
from power_perceiver.load_prepared_batches.data_loader import HRVSatellite, GSP, PV, Sun
from power_perceiver.load_prepared_batches.prepared_dataset import PreparedDataset
from power_perceiver.transforms.pv import PVPowerRollingWindow
from power_perceiver.transforms.satellite import PatchSatellite
from power_perceiver.xr_batch_processor import ReduceNumPVSystems, SelectPVSystemsNearCenterOfImage, AlignGSPTo5Min
from power_perceiver.np_batch_processor import EncodeSpaceTime, Topography
from power_perceiver.consts import BatchKey



In [15]:
DATA_PATH = Path("~/dev/ocf/power_perceiver/data_for_testing/").expanduser()
assert DATA_PATH.exists()

In [17]:
BATCH_IDX = 0
gsp = GSP(data_path=DATA_PATH)[BATCH_IDX]
hrv = HRVSatellite(data_path=DATA_PATH)[BATCH_IDX]
pv = PV(data_path=DATA_PATH)[BATCH_IDX]

In [18]:
gsp

In [19]:
hrv

In [20]:
pv

In [21]:
31-24

7

In [95]:
# Find the corresponding GSP 30 minute timestep for each 5 minute satellite timestep.
# We do this by taking the `ceil("30T")` of each 5 minute satellite timestep.
# Most of the code below is just converting from xarray to Pandas and back
# so we can use `pd.DatetimeIndex.ceil` on each datetime:
time_5_min_series = hrv.time_utc.to_series()
time_5_min_dt_index = pd.DatetimeIndex(time_5_min_series)
time_30_min_dt_index = time_5_min_dt_index.ceil("30T")
time_30_min_series = pd.Series(
    time_30_min_dt_index,
    index=time_5_min_series.index
)
time_30_min_da = time_30_min_series.to_xarray()

In [112]:
# Loop through each example and find the index into the GSP time dimension
# of the GSP timestep corresponding to each 5 minute satellite timestep:
gsp_5_min_for_all_examples = []
max_time_idx = len(gsp.time) - 1
for example_i in gsp.example:
    idx_into_gsp = np.searchsorted(
        gsp.sel(example=example_i).time_utc.values,
        time_30_min_da.sel(example=example_i).values,
    )
    gsp_5_min = gsp.isel(example=example_i, time=idx_into_gsp.clip(max=max_time_idx))
    
    # Now, for any timestep where we don't have GSP data, set to NaN:
    mask = idx_into_gsp <= max_time_idx
    gsp_5_min = gsp_5_min.where(mask)
    gsp_5_min["time_utc"] = gsp_5_min.time_utc.where(mask)
    gsp_5_min_for_all_examples.append(gsp_5_min)
    
gsp_5_min_for_all_examples = xr.concat(gsp_5_min_for_all_examples, dim="example")
gsp_5_min_for_all_examples

In [98]:
time_30_min_da.sel(example=example_i)

In [94]:
hrv.time_utc.sel(example=example_i)

In [4]:
dataset = PreparedDataset(
    data_path=DATA_PATH,
    data_loaders=[
        HRVSatellite(
            transforms=[
                PatchSatellite(),
            ]
        ),
        PV(transforms=[PVPowerRollingWindow()]),
        GSP(),
    ],
    xr_batch_processors=[
        SelectPVSystemsNearCenterOfImage(),
        ReduceNumPVSystems(requested_num_pv_systems=8),
        AlignGSPTo5Min(),
    ],
    np_batch_processors=[
        EncodeSpaceTime(),
        Topography("/home/jack/europe_dem_2km_osgb.tif"),
    ],
)


  proj = self._crs.to_proj4(version=version)
  proj = self._crs.to_proj4(version=version)


In [5]:
%%time
np_batch = dataset[0]

CPU times: user 627 ms, sys: 12.1 ms, total: 639 ms
Wall time: 638 ms


In [6]:

np_batch.keys()



dict_keys([<BatchKey.hrvsatellite: 1>, <BatchKey.hrvsatellite_time_utc: 6>, <BatchKey.hrvsatellite_y_osgb: 2>, <BatchKey.hrvsatellite_x_osgb: 3>, <BatchKey.hrvsatellite_y_geostationary: 4>, <BatchKey.hrvsatellite_x_geostationary: 5>, <BatchKey.pv: 11>, <BatchKey.pv_system_row_number: 12>, <BatchKey.pv_id: 13>, <BatchKey.pv_capacity_wp: 14>, <BatchKey.pv_mask: 15>, <BatchKey.pv_time_utc: 18>, <BatchKey.pv_x_osgb: 17>, <BatchKey.pv_y_osgb: 16>, <BatchKey.gsp: 22>, <BatchKey.gsp_id: 23>, <BatchKey.gsp_time_utc: 26>, <BatchKey.gsp_y_osgb: 24>, <BatchKey.gsp_x_osgb: 25>, <BatchKey.gsp_5_min: 30>, <BatchKey.gsp_5_min_time_utc: 31>, <BatchKey.hrvsatellite_x_osgb_fourier: 9>, <BatchKey.pv_x_osgb_fourier: 20>, <BatchKey.gsp_x_osgb_fourier: 28>, <BatchKey.hrvsatellite_y_osgb_fourier: 8>, <BatchKey.pv_y_osgb_fourier: 19>, <BatchKey.gsp_y_osgb_fourier: 27>, <BatchKey.hrvsatellite_time_utc_fourier: 10>, <BatchKey.pv_time_utc_fourier: 21>, <BatchKey.gsp_time_utc_fourier: 29>, <BatchKey.gsp_5_min_tim

In [7]:
np_batch[BatchKey.gsp].shape

(31, 6)

In [8]:
np_batch[BatchKey.gsp_x_osgb_fourier].shape

(31, 1, 8)

In [9]:
np_batch[BatchKey.gsp_5_min].shape


(31, 31)

In [10]:
np_batch[BatchKey.gsp_5_min_time_utc]

array([[1.5999030e+09, 1.5999030e+09, 1.5999030e+09, 1.5999030e+09,
        1.5999030e+09, 1.5999030e+09, 1.5999048e+09, 1.5999048e+09,
        1.5999048e+09, 1.5999048e+09, 1.5999048e+09, 1.5999048e+09,
        1.5999066e+09, 1.5999066e+09, 1.5999066e+09, 1.5999066e+09,
        1.5999066e+09, 1.5999066e+09, 1.5999084e+09, 1.5999084e+09,
        1.5999084e+09, 1.5999084e+09, 1.5999084e+09, 1.5999084e+09,
        1.5999101e+09, 1.5999101e+09, 1.5999101e+09, 1.5999101e+09,
        1.5999101e+09, 1.5999101e+09,           nan],
       [1.6007724e+09, 1.6007724e+09, 1.6007724e+09, 1.6007724e+09,
        1.6007724e+09, 1.6007741e+09, 1.6007741e+09, 1.6007741e+09,
        1.6007741e+09, 1.6007741e+09, 1.6007741e+09, 1.6007759e+09,
        1.6007759e+09, 1.6007759e+09, 1.6007759e+09, 1.6007759e+09,
        1.6007759e+09, 1.6007779e+09, 1.6007779e+09, 1.6007779e+09,
        1.6007779e+09, 1.6007779e+09, 1.6007779e+09, 1.6007796e+09,
        1.6007796e+09, 1.6007796e+09, 1.6007796e+09, 1.6007796

In [12]:
np_batch[BatchKey.gsp_5_min_time_utc_fourier][:, :,  0]

array([[0.56904805, 0.56904805, 0.56904805, 0.56904805, 0.56904805,
        0.56904805, 0.9358613 , 0.9358613 , 0.9358613 , 0.9358613 ,
        0.9358613 , 0.9358613 , 0.9700774 , 0.9700774 , 0.9700774 ,
        0.9700774 , 0.9700774 , 0.9700774 , 0.6595363 , 0.6595363 ,
        0.6595363 , 0.6595363 , 0.6595363 , 0.6595363 , 0.11460166,
        0.11460166, 0.11460166, 0.11460166, 0.11460166, 0.11460166,
               nan],
       [0.56904805, 0.56904805, 0.56904805, 0.56904805, 0.56904805,
        0.9358613 , 0.9358613 , 0.9358613 , 0.9358613 , 0.9358613 ,
        0.9358613 , 0.9700774 , 0.9700774 , 0.9700774 , 0.9700774 ,
        0.9700774 , 0.9700774 , 0.6264281 , 0.6264281 , 0.6264281 ,
        0.6264281 , 0.6264281 , 0.6264281 , 0.07155345, 0.07155345,
        0.07155345, 0.07155345, 0.07155345, 0.07155345,        nan,
               nan],
       [0.56904805, 0.56904805, 0.56904805, 0.56904805, 0.56904805,
        0.56904805, 0.9358613 , 0.9358613 , 0.9358613 , 0.9358613 ,
      

In [13]:
np_batch[BatchKey.gsp_5_min][0]

array([0.18378873, 0.18378873, 0.18378873, 0.18378873, 0.18378873,
       0.18378873, 0.37143427, 0.37143427, 0.37143427, 0.37143427,
       0.37143427, 0.37143427, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.02558792, 0.02558792,
       0.02558792, 0.02558792, 0.02558792, 0.02558792, 0.00996516,
       0.00996516, 0.00996516, 0.00996516, 0.00996516, 0.00996516,
              nan], dtype=float32)