In [1]:
import os
import shutil
import glob
import time
from tqdm import tqdm
import random
import numpy as np
import xarray as xr
import pandas as pd
import tensorflow as tf

# Build up efficient TF data pipelines from netCDF-files

We test two different approaches to build up the dataset input streams. <br>
The first one is based on `open_dataset` and requires a large buffer size to enable proper sampling (buffer size $\mathcal{O}(10^4)$ so that at minimum 10 files are buffered). This is due to the fact that only the data files are randomized, not the data samples itself as in the next approach. <br>
The second approach is based on `open_mfdatset` which makes the data sampling much easier since it allows randomization on an index-list for the time dimension to build up the iterator.

In [2]:
def load_nc_dir_with_generator(dir_, patt, shuffle=True, seed=42):
    """
    Opens datafiles via looping in the generator-method. This implies a larger buffer_size (> 12*744 where 12 corresponds to months per year
    and 744 is the number of time steps in a monthly datafile) when shuffling since the buffer gets filled up with data from sequential (unordered) files.
    :param dir_: The directory where the netCDF-files are located.
    :param patt: Substring-pattern to identify the desired netCDF-files ("{patt}*.nc" is applied for searching)
    :param shuffle: flag to enable shuffling
    :param seed: seed for random shuffling
    :return: tf.Dataset for data streain in neural networks
    """
    
    nc_files = glob.glob(os.path.join(dir_, f"{patt}*.nc"))

    if shuffle:
        random.seed(seed)
        random.shuffle(nc_files)
    
    def gen(nc_files, shuffle=True, seed=42):

        for file in nc_files:
            ds = xr.open_dataset(file, engine='netcdf4')
            ntimes = len(ds["time"])
            for t in range(ntimes):
                ds_t = ds.isel({"time": t})
                data_dict = {key: tf.convert_to_tensor(val) for key, val in ds_t.items()}
                data_dict["time"]= np.array([pd.to_datetime(ds_t["time"].values).strftime("%Y-%m-%d %H:%M")])
                yield data_dict


    sample = next(iter(gen(nc_files, shuffle, seed=seed)))
    
    gen_mod = gen(nc_files, shuffle, seed)

    return tf.data.Dataset.from_generator(
        lambda: gen_mod,
        output_signature={
            key: tf.TensorSpec(val.shape, dtype=val.dtype)
            for key, val in sample.items()
        }
    )

def load_mfnc_dir_with_generator(dir_: str, patt: str, shuffle: bool = True, seed: int = 42):
    """
    Opens netCDF-files using xarray's open_mfdataset-method. Shuffling of the data is achieved by shuffling over the time step-indices.
    For efficiency, decoding of the time is disabled (implying shared time-units for all netCDF-data to avoid overwriting with open_mfdataset!!!) 
    since this information is not required for data streaming.
    :param dir_: The directory where the netCDF-files are located.
    :param patt: Substring-pattern to identify the desired netCDF-files ("{patt}*.nc" is applied for searching)
    :param shuffle: flag to enable shuffling
    :param seed: seed for random shuffling
    :return: tf.Dataset for data streain in neural networks
    """    
    ds_all = xr.open_mfdataset(os.path.join(dir_, f"{patt}*.nc"), cache=False, decode_cf=False)
    ntimes = len(ds_all["time"])
    if shuffle: 
        random.seed(seed)
        time_list = random.sample(range(ntimes), ntimes)
    else:
        time_list = range(ntimes)   


    def gen(ds_all):
        #ds_all = xr.open_mfdataset(os.path.join(dir_, f"{patt}*.nc"), cache=False, decode_cf=False)#, parallel=True)#, decode_times=False)       
        for t in time_list:
            # ds = xr.decode_cf(ds_all.isel({"time": t}))
            ds = ds_all.isel({"time": t})
            data_dict = {key: tf.convert_to_tensor(val) for key, val in ds.items()}
            # data_dict["time"]= np.array([pd.to_datetime(ds["time"].values).strftime("%Y-%m-%d %H:%M")])
            yield data_dict        

                    
    sample = next(iter(gen(ds_all)))
    
    gen_mod = gen(ds_all)
    
    return tf.data.Dataset.from_generator(
        lambda: gen_mod,
        output_signature={
            key: tf.TensorSpec(val.shape, dtype=val.dtype)
            for key, val in sample.items()
        }
    )


### highly in efficient in terms of memory -> not tested subsequently!!!
def load_data(dir_, patt) -> xr.DataArray:
    """
    Obtain the data and meta information from the netcdf files, including the len of the samples, mim and max values
    return: data as xarray's DataArray with dimensions [channels, time, lat, lon]
    """

    def reshape_ds(ds):
        da = ds.to_array(dim="variables")
        da = da.transpose(..., "variables")
        return da
    
    ds = xr.open_mfdataset(os.path.join(dir_, f"{patt}*.nc"), cache=False, parallel=True)
    da = reshape_ds(ds)
    init_times = da["time"]
    
    nvars = len(da["variables"])

    return da, init_times #da.chunk(chunks={"time": 744, "variables": nvars}), init_times

After setting up the data directory, both strategies are benchmarked.

In [3]:
datadir = "/p/scratch/deepacf/maelstrom/maelstrom_data/ap5_michael/preprocessed_era5_ifs/netcdf_data/all_files/"
pattern = "preproc_"

We create the respective TF datasets, ...

In [4]:
tfds_test = load_nc_dir_with_generator(datadir, pattern)
tfds_test_mf = load_mfnc_dir_with_generator(datadir, pattern)

2022-07-13 16:25:16.528116: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX512F
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-13 16:25:17.617259: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1510] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 31019 MB memory:  -> device: 0, name: Tesla V100-PCIE-32GB, pci bus id: 0000:5e:00.0, compute capability: 7.0


... configure them and... 

In [5]:
sleep_sec = 0
ap1 = iter(tfds_test.shuffle(buffer_size=20000).batch(32).prefetch(100).repeat(1))
ap2 = iter(tfds_test_mf.batch(32).prefetch(100).repeat(1))

2022-07-13 16:25:50.144503: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


then run both approaches. We start with approach 1:

In [6]:
%%time

sleep_sec = 0

for i in tqdm(range(10)):
    if i == 1:
        time_s = time.time()
    
    batch = ap1.get_next()
    print(batch["2t_in"])
    print("***************")
   
load_time_ap1 = (time.time() - time_s)/9.
print("After filling the buffer, retrieving each minibatch took: {0:5.04f}s".format(load_time_ap1))

  0%|          | 0/10 [00:00<?, ?it/s]2022-07-13 16:26:00.209353: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:175] Filling up shuffle buffer (this may take a while): 1258 of 20000
2022-07-13 16:26:10.208179: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:175] Filling up shuffle buffer (this may take a while): 2446 of 20000
2022-07-13 16:26:20.210874: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:175] Filling up shuffle buffer (this may take a while): 3646 of 20000
2022-07-13 16:26:30.217325: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:175] Filling up shuffle buffer (this may take a while): 4877 of 20000
2022-07-13 16:26:40.208714: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:175] Filling up shuffle buffer (this may take a while): 6083 of 20000
2022-07-13 16:26:50.207312: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:175] Filling up shuffle buffer (this may take a while): 7261 of 20000
2022-07-13 16:27:00.210052: I tensorflow/core/kernels/da

tf.Tensor(
[[[284.54684448 284.54766846 284.5475769  ... 280.60705566 280.50146484
   280.39590454]
  [284.56790161 284.57220459 284.57519531 ... 280.45059204 280.35202026
   280.25344849]
  [284.58990479 284.60031128 284.61026001 ... 280.29412842 280.20257568
   280.11102295]
  ...
  [281.30834961 281.26651001 281.22348022 ... 281.23043823 281.17019653
   281.10198975]
  [281.35726929 281.33880615 281.32748413 ... 281.21542358 281.16644287
   281.11279297]
  [281.40475464 281.07073975 281.0769043  ... 281.20953369 281.16818237
   281.12527466]]

 [[287.03387451 287.03518677 287.03536987 ... 283.34674072 283.29367065
   283.2406311 ]
  [287.06460571 287.07086182 287.07546997 ... 283.27249146 283.22360229
   283.17468262]
  [287.09594727 287.11038208 287.1242981  ... 283.19824219 283.15350342
   283.10876465]
  ...
  [282.04986572 281.9960022  281.94046021 ... 285.07333374 285.09609985
   285.11639404]
  [282.11691284 282.09432983 282.08148193 ... 285.12072754 285.1267395
   285.1351623

 20%|██        | 2/10 [02:46<09:08, 68.62s/it] 

tf.Tensor(
[[[287.925354   287.92514038 287.92666626 ... 288.7467041  288.69491577
   288.64312744]
  [287.90405273 287.89959717 287.89755249 ... 288.728302   288.68939209
   288.65048218]
  [287.87857056 287.86514282 287.85247803 ... 288.7098999  288.68386841
   288.65783691]
  ...
  [287.05245972 286.95916748 286.86392212 ... 290.32952881 290.38696289
   290.45962524]
  [287.13687134 287.0881958  287.05221558 ... 290.34347534 290.39141846
   290.44818115]
  [287.22085571 286.45797729 286.45947266 ... 290.34759521 290.38754272
   290.43161011]]

 [[287.15948486 287.16278076 287.16717529 ... 291.25668335 291.26333618
   291.26998901]
  [287.19525146 287.20220947 287.2097168  ... 291.34783936 291.35256958
   291.3572998 ]
  [287.22354126 287.23330688 287.24325562 ... 291.43899536 291.44180298
   291.4446106 ]
  ...
  [291.12841797 291.09762573 291.06564331 ... 292.61312866 292.6690979
   292.74795532]
  [291.17337036 291.16165161 291.15606689 ... 292.60638428 292.66000366
   292.7250061

 30%|███       | 3/10 [02:46<04:21, 37.41s/it]

tf.Tensor(
[[[280.53158569 280.53042603 280.52661133 ... 271.63430786 271.38458252
   271.13485718]
  [280.54336548 280.54602051 280.54562378 ... 271.54833984 271.29699707
   271.04562378]
  [280.56329346 280.57580566 280.58728027 ... 271.46234131 271.2093811
   270.9564209 ]
  ...
  [282.06298828 282.02215576 281.98068237 ... 279.73968506 279.73855591
   279.73809814]
  [282.09329224 282.07025146 282.05197144 ... 279.73397827 279.73471069
   279.73532104]
  [282.12408447 281.47824097 281.4776001  ... 279.7298584  279.73129272
   279.73242188]]

 [[283.05233765 283.05407715 283.05502319 ... 279.91567993 279.8664856
   279.81729126]
  [283.08493042 283.0914917  283.09637451 ... 279.70169067 279.66125488
   279.62081909]
  [283.11740112 283.13235474 283.146698   ... 279.48770142 279.45602417
   279.42434692]
  ...
  [275.05059814 275.06515503 275.08026123 ... 275.56762695 275.5473938
   275.52664185]
  [275.03100586 275.03765869 275.04193115 ... 275.56283569 275.54663086
   275.53015137]

 40%|████      | 4/10 [02:47<02:16, 22.75s/it]

tf.Tensor(
[[[290.86203003 290.86309814 290.86419678 ... 286.95324707 286.76177979
   286.57028198]
  [290.87677002 290.87966919 290.88189697 ... 286.74798584 286.56246948
   286.37698364]
  [290.89068604 290.89703369 290.90307617 ... 286.54269409 286.3631897
   286.1836853 ]
  ...
  [291.56710815 291.45831299 291.34786987 ... 287.01226807 287.06781006
   287.12631226]
  [291.64517212 291.58282471 291.53274536 ... 287.06140137 287.09603882
   287.13565063]
  [291.72494507 290.66079712 290.65274048 ... 287.09283447 287.11660767
   287.14385986]]

 [[289.37762451 289.37997437 289.38290405 ... 291.21011353 291.17147827
   291.1328125 ]
  [289.40582275 289.41137695 289.41708374 ... 291.33932495 291.30175781
   291.26416016]
  [289.42910767 289.43756104 289.44610596 ... 291.46853638 291.43200684
   291.39547729]
  ...
  [301.47128296 301.38973999 301.30667114 ... 297.99816895 298.03051758
   298.07836914]
  [301.54077148 301.4977417  301.46551514 ... 297.95578003 297.99786377
   298.0443420

 50%|█████     | 5/10 [02:47<01:13, 14.64s/it]

tf.Tensor(
[[[289.38510132 289.38967896 289.39440918 ... 287.64511108 287.64691162
   287.64874268]
  [289.44924927 289.46200562 289.47384644 ... 287.60754395 287.60928345
   287.61105347]
  [289.50619507 289.52902222 289.55160522 ... 287.56997681 287.57165527
   287.57333374]
  ...
  [288.76901245 288.70809937 288.64599609 ... 293.05383301 293.08377075
   293.12918091]
  [288.82122803 288.78869629 288.76416016 ... 293.0223999  293.05935669
   293.10171509]
  [288.8734436  288.26470947 288.26489258 ... 292.99563599 293.03231812
   293.06982422]]

 [[281.03982544 281.04263306 281.04586792 ... 281.8973999  281.88446045
   281.87155151]
  [281.07592773 281.08306885 281.09014893 ... 282.0791626  282.06784058
   282.05648804]
  [281.10668945 281.11831665 281.12997437 ... 282.26095581 282.25119019
   282.24142456]
  ...
  [282.2946167  282.23300171 282.1701355  ... 284.60720825 284.57952881
   284.55032349]
  [282.34841919 282.31564331 282.29095459 ... 284.57574463 284.5604248
   284.5419616

 60%|██████    | 6/10 [02:47<00:39,  9.75s/it]

tf.Tensor(
[[[276.68301392 276.67788696 276.67337036 ... 273.72711182 273.8145752
   273.90203857]
  [276.60333252 276.58740234 276.57345581 ... 273.72036743 273.8008728
   273.88140869]
  [276.52984619 276.49908447 276.46890259 ... 273.71359253 273.78717041
   273.86074829]
  ...
  [277.87786865 277.87139893 277.86495972 ... 275.84066772 275.80511475
   275.75802612]
  [277.87918091 277.87496948 277.87121582 ... 275.83294678 275.80267334
   275.76583862]
  [277.88082886 277.21841431 277.22018433 ... 275.83139038 275.80618286
   275.77780151]]

 [[278.91485596 278.91217041 278.9079895  ... 272.31723022 272.27539062
   272.23358154]
  [278.89196777 278.88760376 278.88214111 ... 272.33227539 272.2918396
   272.25140381]
  [278.87637329 278.87225342 278.86776733 ... 272.34729004 272.30825806
   272.26919556]
  ...
  [280.0447998  280.04904175 280.05252075 ... 277.14096069 277.22445679
   277.3286438 ]
  [280.06893921 280.07888794 280.0916748  ... 277.1685791  277.23602295
   277.31610107]

 70%|███████   | 7/10 [02:47<00:19,  6.65s/it]

tf.Tensor(
[[[288.78503418 288.78683472 288.78921509 ... 291.02459717 291.00762939
   290.99066162]
  [288.8046875  288.80853271 288.81274414 ... 291.13418579 291.11383057
   291.09350586]
  [288.82009888 288.82525635 288.83056641 ... 291.24377441 291.22003174
   291.19631958]
  ...
  [291.79983521 291.75592041 291.71142578 ... 290.50561523 290.46011353
   290.40496826]
  [291.82925415 291.80368042 291.78289795 ... 290.49176025 290.45492554
   290.41223145]
  [291.85952759 291.58700562 291.58102417 ... 290.48608398 290.45568848
   290.42260742]]

 [[290.21472168 290.21792603 290.22229004 ... 292.49319458 292.4105835
   292.32797241]
  [290.24780273 290.25421143 290.2611084  ... 292.57739258 292.50012207
   292.42282104]
  [290.27371216 290.28265381 290.29174805 ... 292.66159058 292.58963013
   292.5177002 ]
  ...
  [293.88372803 293.86022949 293.83633423 ... 295.00091553 294.94241333
   294.88711548]
  [293.90151978 293.88809204 293.87731934 ... 294.87271118 294.85855103
   294.8358764

 80%|████████  | 8/10 [02:48<00:09,  4.62s/it]

tf.Tensor(
[[[290.04193115 290.04382324 290.04644775 ... 287.88464355 287.7991333
   287.71359253]
  [290.06079102 290.06445312 290.0687561  ... 287.67648315 287.6055603
   287.53466797]
  [290.07476807 290.07897949 290.08340454 ... 287.46829224 287.41201782
   287.35571289]
  ...
  [291.63891602 291.53433228 291.42843628 ... 290.01620483 290.0713501
   290.13735962]
  [291.70489502 291.64224243 291.590271   ... 290.04580688 290.08673096
   290.13543701]
  [291.77346802 290.66452026 290.65280151 ... 290.06195068 290.09362793
   290.12957764]]

 [[279.45684814 279.45687866 279.45675659 ... 277.75668335 277.7612915
   277.76586914]
  [279.45895386 279.4593811  279.45944214 ... 277.72589111 277.72668457
   277.72747803]
  [279.46166992 279.46328735 279.4647522  ... 277.69509888 277.69207764
   277.68908691]
  ...
  [277.80197144 277.730896   277.65835571 ... 280.77575684 280.79281616
   280.81542969]
  [277.86447144 277.82684326 277.79867554 ... 280.753479   280.77526855
   280.79785156]


 90%|█████████ | 9/10 [02:48<00:03,  3.26s/it]

tf.Tensor(
[[[275.47036743 275.46685791 275.46057129 ... 269.35702515 269.25683594
   269.15661621]
  [275.45010376 275.44641113 275.44012451 ... 269.25241089 269.15512085
   269.05783081]
  [275.44158936 275.44262695 275.44281006 ... 269.14776611 269.05340576
   268.95904541]
  ...
  [278.71685791 278.6991272  278.68057251 ... 273.62014771 273.6020813
   273.57839966]
  [278.7460022  278.73983765 278.73745728 ... 273.63803101 273.61660767
   273.59396362]
  [278.77386475 277.93341064 277.94750977 ... 273.65283203 273.63128662
   273.61044312]]

 [[276.74459839 276.73690796 276.72607422 ... 266.22189331 266.20681763
   266.19171143]
  [276.66793823 276.65307617 276.63601685 ... 265.82052612 265.82449341
   265.82843018]
  [276.6104126  276.59240723 276.57354736 ... 265.41912842 265.44216919
   265.46517944]
  ...
  [280.01739502 280.05279541 280.0892334  ... 271.95114136 271.91094971
   271.8626709 ]
  [279.97528076 279.99151611 280.00195312 ... 271.93634033 271.90460205
   271.8676757

100%|██████████| 10/10 [02:48<00:00, 16.87s/it]

tf.Tensor(
[[[292.04806519 292.05078125 292.05584717 ... 294.21487427 294.20236206
   294.18988037]
  [292.06195068 292.06442261 292.0690918  ... 294.37701416 294.35693359
   294.33685303]
  [292.06637573 292.06448364 292.06332397 ... 294.53915405 294.51147461
   294.48382568]
  ...
  [296.40579224 296.32788086 296.2484436  ... 297.40942383 297.47381592
   297.55966187]
  [296.47311401 296.43170166 296.40054321 ... 297.39279175 297.45623779
   297.5291748 ]
  [296.54037476 295.96981812 295.96859741 ... 297.37362671 297.43151855
   297.49227905]]

 [[289.64172363 289.64471436 289.64804077 ... 291.16470337 291.14056396
   291.11645508]
  [289.68075562 289.68844604 289.69598389 ... 291.23364258 291.21502686
   291.19641113]
  [289.71435547 289.72729492 289.74020386 ... 291.30258179 291.28948975
   291.27639771]
  ...
  [294.88909912 294.80682373 294.72314453 ... 292.03414917 292.11752319
   292.21383667]
  [294.95294189 294.90750122 294.87213135 ... 292.06729126 292.13192749
   292.205230




Now, we continue with approach 2:

In [7]:
%%time

time_s = time.time()

for i in tqdm(range(10)):
    if i == 0:
        print(f"Sleeping for {sleep_sec}s...")
        time.sleep(sleep_sec)
    
    batch = ap2.get_next()
    print(batch["2t_in"])
    print("***************")
    
load_time_ap2 = (time.time() - time_s)/10.


  0%|          | 0/10 [00:00<?, ?it/s]

Sleeping for 0s...


 10%|█         | 1/10 [00:02<00:18,  2.05s/it]

tf.Tensor(
[[[285.24121094 285.24240112 285.24356079 ... 283.93896484 283.92404175
   283.90908813]
  [285.25860596 285.26208496 285.2651062  ... 283.84768677 283.8348999
   283.82211304]
  [285.27450562 285.28115845 285.28765869 ... 283.75643921 283.74578857
   283.73516846]
  ...
  [282.3638916  282.27709961 282.18832397 ... 281.96502686 282.0050354
   282.03707886]
  [282.447052   282.4029541  282.37115479 ... 282.01016235 282.03088379
   282.05023193]
  [282.52938843 281.88253784 281.88616943 ... 282.04141235 282.05410767
   282.0668335 ]]

 [[285.88839722 285.88092041 285.8706665  ... 281.9781189  281.93240356
   281.88668823]
  [285.81097412 285.79589844 285.77868652 ... 281.85232544 281.80548096
   281.75863647]
  [285.7522583  285.73376465 285.71444702 ... 281.72653198 281.67858887
   281.63061523]
  ...
  [288.64047241 288.70535278 288.77069092 ... 283.64855957 283.69055176
   283.73916626]
  [288.61199951 288.65396118 288.69070435 ... 283.66793823 283.69976807
   283.73623657

 20%|██        | 2/10 [00:05<00:20,  2.59s/it]

tf.Tensor(
[[[291.50253296 291.50369263 291.50692749 ... 292.32421875 292.32125854
   292.31826782]
  [291.49667358 291.49523926 291.4961853  ... 292.33956909 292.32778931
   292.31600952]
  [291.48403931 291.47546387 291.46765137 ... 292.35488892 292.33432007
   292.31375122]
  ...
  [300.29400635 300.19250488 300.08892822 ... 299.96908569 300.04025269
   300.13302612]
  [300.38571167 300.33258057 300.29318237 ... 299.94296265 300.01495361
   300.09545898]
  [300.4770813  299.75811768 299.75912476 ... 299.91659546 299.98361206
   300.05221558]]

 [[281.07696533 281.07543945 281.07217407 ... 281.68118286 281.64285278
   281.60449219]
  [281.0730896  281.07254028 281.07009888 ... 281.79064941 281.75311279
   281.71554565]
  [281.07577515 281.07980347 281.08322144 ... 281.90008545 281.86334229
   281.82662964]
  ...
  [277.94216919 277.9039917  277.86566162 ... 274.09136963 274.06347656
   274.02215576]
  [277.95513916 277.9289856  277.90530396 ... 274.11129761 274.07965088
   274.042633

 30%|███       | 3/10 [00:06<00:15,  2.27s/it]

tf.Tensor(
[[[288.67758179 288.67498779 288.67010498 ... 284.28256226 284.17498779
   284.06738281]
  [288.66448975 288.66217041 288.6574707  ... 284.14892578 284.05108643
   283.95321655]
  [288.66104126 288.66360474 288.66534424 ... 284.01531982 283.92715454
   283.83901978]
  ...
  [284.36999512 284.26861572 284.16555786 ... 281.77688599 281.87313843
   281.97296143]
  [284.44732666 284.38980103 284.34405518 ... 281.87908936 281.93414307
   281.99841309]
  [284.52590942 283.49075317 283.48736572 ... 281.94604492 281.98031616
   282.02182007]]

 [[288.88879395 288.89004517 288.89126587 ... 285.74295044 285.76617432
   285.78936768]
  [288.90710449 288.91073608 288.91390991 ... 285.6217041  285.66519165
   285.7086792 ]
  [288.92379761 288.93081665 288.93768311 ... 285.50045776 285.5642395
   285.62799072]
  ...
  [283.50534058 283.44030762 283.37405396 ... 286.29693604 286.36541748
   286.45117188]
  [283.55923462 283.52407837 283.49716187 ... 286.31088257 286.36868286
   286.4362182

 40%|████      | 4/10 [00:09<00:13,  2.22s/it]

tf.Tensor(
[[[280.45083618 280.45141602 280.45074463 ... 279.28726196 279.26245117
   279.23760986]
  [280.47213745 280.47653198 280.47903442 ... 279.24938965 279.23001099
   279.21063232]
  [280.49594116 280.50790405 280.51925659 ... 279.21148682 279.1975708
   279.1836853 ]
  ...
  [278.53591919 278.47219849 278.40734863 ... 280.63772583 280.65917969
   280.7069397 ]
  [278.58737183 278.55303955 278.52682495 ... 280.57424927 280.61447144
   280.66363525]
  [278.63900757 277.90689087 277.90505981 ... 280.52325439 280.56671143
   280.61187744]]

 [[287.80804443 287.81158447 287.81689453 ... 291.37524414 291.41293335
   291.45062256]
  [287.83978271 287.84588623 287.85339355 ... 291.4571228  291.49227905
   291.52740479]
  [287.86193848 287.86798096 287.8744812  ... 291.53900146 291.57159424
   291.60418701]
  ...
  [289.36456299 289.33761597 289.30966187 ... 293.4246521  293.49169922
   293.5690918 ]
  [289.40380859 289.39364624 289.38885498 ... 293.45596313 293.50662231
   293.5646362

 50%|█████     | 5/10 [00:11<00:11,  2.20s/it]

tf.Tensor(
[[[279.72381592 279.72399902 279.72268677 ... 277.04672241 277.01947021
   276.99221802]
  [279.74252319 279.74642944 279.74847412 ... 277.05648804 277.02325439
   276.98999023]
  [279.7644043  279.77563477 279.7862854  ... 277.06625366 277.02700806
   276.98779297]
  ...
  [282.04364014 281.99377441 281.94247437 ... 279.66201782 279.69934082
   279.74624634]
  [282.10031128 282.07705688 282.06176758 ... 279.65679932 279.69192505
   279.73126221]
  [282.15570068 281.6166687  281.62576294 ... 279.64932251 279.68096924
   279.71374512]]

 [[281.21148682 281.21026611 281.20758057 ... 276.11950684 275.98858643
   275.8576355 ]
  [281.20941162 281.20916748 281.20739746 ... 275.99075317 275.85406494
   275.71734619]
  [281.21264648 281.21633911 281.21954346 ... 275.86196899 275.71951294
   275.5770874 ]
  ...
  [276.50958252 276.56503296 276.62194824 ... 277.00448608 277.00985718
   277.02694702]
  [276.45092773 276.47796631 276.49664307 ... 276.97094727 276.98672485
   277.006103

 60%|██████    | 6/10 [00:13<00:08,  2.23s/it]

tf.Tensor(
[[[288.262146   288.26568604 288.26971436 ... 290.78717041 290.78033447
   290.77349854]
  [288.30725098 288.31616211 288.32485962 ... 290.90115356 290.89291382
   290.88467407]
  [288.34585571 288.36068726 288.37545776 ... 291.01513672 291.00549316
   290.99588013]
  ...
  [296.9307251  296.90628052 296.88067627 ... 301.45489502 301.43258667
   301.42684937]
  [296.97387695 296.96688843 296.96578979 ... 301.34988403 301.36245728
   301.37557983]
  [297.01467896 296.76742554 296.77981567 ... 301.27325439 301.29876709
   301.32034302]]

 [[288.07324219 288.08016968 288.08761597 ... 290.70819092 290.74526978
   290.78234863]
  [288.16748047 288.18618774 288.20425415 ... 291.10089111 291.14099121
   291.18109131]
  [288.24942017 288.28115845 288.31280518 ... 291.49359131 291.53671265
   291.5798645 ]
  ...
  [299.49276733 299.34838867 299.20126343 ... 301.84265137 301.89562988
   301.98513794]
  [299.61328125 299.53512573 299.4753418  ... 301.73471069 301.81607056
   301.907623

 70%|███████   | 7/10 [00:15<00:06,  2.16s/it]

tf.Tensor(
[[[287.6937561  287.69549561 287.69845581 ... 291.76135254 291.65380859
   291.54626465]
  [287.70495605 287.70700073 287.71014404 ... 291.78338623 291.67120361
   291.55899048]
  [287.71069336 287.71112061 287.71188354 ... 291.80545044 291.68856812
   291.57171631]
  ...
  [292.50872803 292.39349365 292.2762146  ... 288.68130493 288.75332642
   288.8416748 ]
  [292.60180664 292.5390625  292.49081421 ... 288.69555664 288.75619507
   288.82595825]
  [292.69540405 291.8946228  291.88833618 ... 288.69888306 288.75012207
   288.8052063 ]]

 [[279.22375488 279.22494507 279.2260437  ... 271.73370361 271.55108643
   271.36843872]
  [279.24154663 279.24508667 279.24816895 ... 271.65447998 271.47888184
   271.30328369]
  [279.25793457 279.26483154 279.27160645 ... 271.57528687 271.40670776
   271.23812866]
  ...
  [278.77215576 278.69140625 278.609375   ... 277.75006104 277.82778931
   277.9336853 ]
  [278.83227539 278.7868042  278.75079346 ... 277.75323486 277.82363892
   277.908935

 80%|████████  | 8/10 [00:17<00:04,  2.19s/it]

tf.Tensor(
[[[284.35925293 284.35949707 284.35772705 ... 282.21395874 282.20574951
   282.19754028]
  [284.38375854 284.3888855  284.39144897 ... 282.3104248  282.30697632
   282.30355835]
  [284.41271973 284.42773438 284.44192505 ... 282.40689087 282.40823364
   282.40957642]
  ...
  [283.94430542 283.86236572 283.77868652 ... 291.10678101 291.20761108
   291.35903931]
  [284.01968384 283.97738647 283.94644165 ... 291.01889038 291.13806152
   291.27630615]
  [284.09451294 283.4493103  283.4508667  ... 290.94174194 291.05807495
   291.1784668 ]]

 [[286.53646851 286.53735352 286.5385437  ... 287.60992432 287.51412964
   287.41830444]
  [286.54598999 286.54782104 286.54971313 ... 287.70922852 287.60897827
   287.50872803]
  [286.55374146 286.55661011 286.55947876 ... 287.8085022  287.7038269
   287.59912109]
  ...
  [290.06622314 290.07684326 290.08703613 ... 291.22424316 291.24169922
   291.27423096]
  [290.07763672 290.08889771 290.10128784 ... 291.1652832  291.19909668
   291.2358703

 90%|█████████ | 9/10 [00:19<00:02,  2.16s/it]

tf.Tensor(
[[[286.57025146 286.57321167 286.57684326 ... 288.03390503 288.01611328
   287.99832153]
  [286.60568237 286.6126709  286.62008667 ... 288.09182739 288.06686401
   288.04187012]
  [286.63452148 286.64456177 286.65484619 ... 288.14978027 288.11761475
   288.0854187 ]
  ...
  [288.72567749 288.67001343 288.61297607 ... 291.40179443 291.47573853
   291.55838013]
  [288.78326416 288.75616455 288.73748779 ... 291.4473877  291.49975586
   291.5597229 ]
  [288.83984375 288.35351562 288.35873413 ... 291.47427368 291.5140686
   291.55819702]]

 [[280.84951782 280.84899902 280.84457397 ... 275.57458496 275.50695801
   275.43936157]
  [280.88442993 280.8918457  280.89413452 ... 275.33566284 275.27865601
   275.22164917]
  [280.93011475 280.95562744 280.97940063 ... 275.09671021 275.05032349
   275.00390625]
  ...
  [274.03015137 273.94070435 273.84970093 ... 272.20092773 272.18276978
   272.16677856]
  [274.10101318 274.0519104  274.01385498 ... 272.20895386 272.19137573
   272.1761474

100%|██████████| 10/10 [00:21<00:00,  2.19s/it]

tf.Tensor(
[[[277.31997681 277.31295776 277.30096436 ... 275.68490601 275.65612793
   275.62734985]
  [277.2729187  277.26419067 277.25100708 ... 275.68664551 275.65231323
   275.61798096]
  [277.24807739 277.24597168 277.24240112 ... 275.68835449 275.64849854
   275.60861206]
  ...
  [275.89181519 275.84567261 275.79873657 ... 275.07290649 275.1362915
   275.20870972]
  [275.92840576 275.90304565 275.8833313  ... 275.10360718 275.15109253
   275.2052002 ]
  [275.96533203 275.5475769  275.54562378 ... 275.12042236 275.1579895
   275.19891357]]

 [[279.41421509 279.41439819 279.41479492 ... 278.37481689 278.28005981
   278.18530273]
  [279.41418457 279.41415405 279.41436768 ... 278.565979   278.47189331
   278.3777771 ]
  [279.41333008 279.41259766 279.41195679 ... 278.75714111 278.66372681
   278.57028198]
  ...
  [281.19512939 281.17840576 281.16122437 ... 285.98464966 286.05633545
   286.15655518]
  [281.21481323 281.20755005 281.20318604 ... 285.92266846 286.00613403
   286.09893799




### Results

In [9]:
print("After filling the buffer, retrieving each minibatch with APPROACH 1 took: {0:5.04f}s".format(load_time_ap1))
print("Retrieving each minibatch with APPROACH 2 took: {0:5.04f}s".format(load_time_ap2))

After filling the buffer, retrieving each minibatch with APPROACH 1 took: 0.2679s
Retrieving each minibatch with APPROACH 2 took: 2.1932s


Thus, we see that the first approach outperforms the second approach (at least after the buffer has been filled once).