In [1]:
import pandas as pd
import xarray as xr
import numpy as np
import zarr
import gcsfs
import dask
import dask.array
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import animation
import cv2 as cv
from skimage import metrics
from IPython.display import HTML

In [14]:
# Override is_retriable.  Google Cloud sometimes throws
# a HttpError code 400.  gcsfs considers this to not be retriable.
# But it is retriable!

def is_retriable(exception):
    """Returns True if this exception is retriable."""
    errs = list(range(500, 505)) + [
        # Jack's addition.  Google Cloud occasionally throws Bad Requests for no apparent reason.
        400,
        # Request Timeout
        408,
        # Too Many Requests
        429,
    ]
    errs += [str(e) for e in errs]
    if isinstance(exception, gcsfs.utils.HttpError):
        return exception.code in errs

    return isinstance(exception, gcsfs.utils.RETRIABLE_EXCEPTIONS)

gcsfs.utils.is_retriable = is_retriable

In [15]:
assert gcsfs.utils.is_retriable(gcsfs.utils.HttpError({'code': 400}))

In [2]:
ZARR = 'solar-pv-nowcasting-data/satellite/EUMETSAT/SEVIRI_RSS/OSGB36/all_zarr_int16'
gcs = gcsfs.GCSFileSystem()
store = gcsfs.GCSMap(root=ZARR, gcs=gcs)

In [6]:
gcs.retries = 20

In [3]:
%%time
dataset = xr.open_zarr(store, consolidated=True)

CPU times: user 5.56 s, sys: 435 ms, total: 6 s
Wall time: 3min 17s


In [4]:
source_array = dataset['stacked_eumetsat_data']

In [8]:
source_array = source_array.astype(np.float32).where(source_array != -1, np.NaN)

In [10]:
%%time
source_array.mean(dim=['time', 'x', 'y']).compute()

In [11]:
%%time
source_array.std(dim=['time', 'x', 'y']).compute()

CPU times: user 4h 34min 35s, sys: 1h 34min 27s, total: 6h 9min 2s
Wall time: 1h 9min 2s


In [16]:
%%time
source_array.max(dim=['time', 'x', 'y']).compute()

CPU times: user 1h 52min 10s, sys: 24min 33s, total: 2h 16min 43s
Wall time: 41min 17s


In [17]:
%%time
source_array.min(dim=['time', 'x', 'y']).compute()

CPU times: user 1h 52min 17s, sys: 24min 12s, total: 2h 16min 29s
Wall time: 40min 55s
