In [1]:
from power_perceiver.data_loader.satellite_zarr_dataset import SatelliteZarrDataset
from power_perceiver.np_batch_processor.sun_position import SunPosition
from power_perceiver.consts import BatchKey

import logging

logging.basicConfig()

In [2]:
SATELLITE_ZARR_PATH = (
    "gs://solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/v3/eumetsat_seviri_hrv_uk.zarr"
)

In [3]:
sat_zarr_ds = SatelliteZarrDataset(
    satellite_zarr_path=SATELLITE_ZARR_PATH,
    n_days_to_load_per_epoch=2,
    load_once=True,
    np_batch_processors=[SunPosition()],
    )
sat_zarr_ds.per_worker_init()
#sat_zarr_ds._load_random_days_from_disk()

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array[indexer]

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    ...     array[indexer]
  return self.array[key]


In [4]:
sat_zarr_ds.xr_sat_dataset.time

In [16]:
import itertools
import pvlib
from power_perceiver.geospatial import osgb_to_lat_lon
import pandas as pd


y_osgb = sat_zarr_ds.xr_sat_dataset.y_osgb
x_osgb = sat_zarr_ds.xr_sat_dataset.x_osgb

corners_osgb = [
    (x_osgb.isel(x=x, y=y).item(), y_osgb.isel(x=x, y=y).item()) 
    for x, y in itertools.product((0, -1), (0, -1))]

corners_osgb = pd.DataFrame(corners_osgb, columns=['x', 'y'])

#solpos = pvlib.solarposition.get_solarposition(
#    time=sat_zarr_ds.xr_sat_dataset.time,
#    latitude=,
#    longitude=,
#)

In [22]:
lats, lons = osgb_to_lat_lon(x=corners_osgb.x, y=corners_osgb.y)



In [24]:
%%time
elevation_for_all_corners = []
for lat, lon in zip(lats, lons):
    solpos = pvlib.solarposition.get_solarposition(
    time=sat_zarr_ds.xr_sat_dataset.time,
    latitude=lat,
    longitude=lon,
    )

    elevation = solpos["elevation"]
    elevation_for_all_corners.append(elevation)
    
elevation_for_all_corners = pd.concat(elevation_for_all_corners, axis="columns")
max_elevation = elevation_for_all_corners.max(axis="columns")
mask = max_elevation >= 10


CPU times: user 977 ms, sys: 1.01 ms, total: 978 ms
Wall time: 976 ms


In [25]:
elevation_for_all_corners

Unnamed: 0,elevation,elevation.1,elevation.2,elevation.3
2020-01-01 09:00:00,1.782920,-21.315225,15.091894,0.364433
2020-01-01 09:05:00,2.494170,-20.825682,15.585843,0.675966
2020-01-01 09:10:00,3.197059,-20.337826,16.065620,0.977789
2020-01-01 09:15:00,3.891341,-19.851836,16.530927,1.269766
2020-01-01 09:20:00,4.576769,-19.367890,16.981470,1.551762
...,...,...,...,...
2020-12-31 15:35:00,13.970957,0.156755,1.611045,-8.549679
2020-12-31 15:40:00,13.460227,0.088112,0.874611,-9.040132
2020-12-31 15:45:00,12.936326,0.009641,0.130460,-9.536453
2020-12-31 15:50:00,12.399544,-0.078619,-0.621176,-10.038466


In [4]:
import torch
ds = torch.utils.data.DataLoader(
    sat_zarr_ds,
    batch_size=32,
    num_workers=1,
    persistent_workers=True,
)

In [5]:
%%time
for batch in ds:
    break

CPU times: user 4.27 ms, sys: 31.3 ms, total: 35.6 ms
Wall time: 482 ms


In [6]:
batch.keys()

dict_keys([<BatchKey.hrvsatellite: 1>, <BatchKey.hrvsatellite_time_utc: 6>, <BatchKey.hrvsatellite_y_osgb: 2>, <BatchKey.hrvsatellite_x_osgb: 3>, <BatchKey.hrvsatellite_y_geostationary: 4>, <BatchKey.hrvsatellite_x_geostationary: 5>, <BatchKey.solar_azimuth_at_t0: 35>, <BatchKey.solar_elevation_at_t0: 36>])

In [7]:
batch[BatchKey.solar_elevation_at_t0]

tensor([-1.0391,  0.6421, -0.6536, -1.5825, -0.5896, -0.8363, -0.9503,  0.9962,
        -0.4699, -0.9913, -1.1641,  1.0732, -0.9181,  2.1896, -0.4731, -0.3397,
         1.2402,  0.9063,  0.4419,  0.7538, -0.9503,  1.7936,  2.1212,  0.8467,
         0.8161,  0.9970,  1.5933,  1.7330,  1.9025, -0.6303,  1.1839,  1.2297])

In [11]:
batch[BatchKey.solar_azimuth_at_t0]

tensor([ 0.2097, -1.3664,  0.0251, -0.8280, -0.3526,  0.1399,  0.7209,  0.5713,
         0.3520, -0.4364, -0.9640,  1.4046, -0.5709, -0.2633,  0.5039,  0.2746,
         0.1712, -1.0254, -1.0788, -1.3080, -0.2799,  0.7264, -0.4885, -1.4894,
        -1.3824, -1.0065,  0.8725,  0.4533, -0.7027, -0.6478,  1.4072, -0.1208])

In [10]:
import pandas as pd

pd.to_datetime(batch[BatchKey.hrvsatellite_time_utc][:, 6], unit="s")

DatetimeIndex(['2020-10-09 12:20:16', '2020-05-28 09:44:32',
               '2020-10-09 13:05:04', '2020-10-09 09:44:32',
               '2020-10-09 11:44:00', '2020-10-09 12:24:32',
               '2020-10-09 13:09:20', '2020-05-28 13:15:44',
               '2020-10-09 12:05:20', '2020-10-09 10:59:12',
               '2020-10-09 09:59:28', '2020-05-28 13:49:52',
               '2020-10-09 11:09:52', '2020-05-28 11:24:48',
               '2020-10-09 13:05:04', '2020-10-09 13:30:40',
               '2020-05-28 12:01:04', '2020-05-28 10:54:56',
               '2020-05-28 10:35:44', '2020-05-28 09:55:12',
               '2020-10-09 11:44:00', '2020-05-28 13:24:16',
               '2020-05-28 10:40:00', '2020-05-28 09:44:32',
               '2020-05-28 09:36:00', '2020-05-28 11:29:04',
               '2020-05-28 13:20:00', '2020-05-28 13:49:52',
               '2020-05-28 11:35:28', '2020-10-09 09:44:32',
               '2020-05-28 13:34:56', '2020-05-28 13:15:44'],
              dtype='da