In [1]:
from netCDF4 import Dataset
import numpy as np
import os

path = f'supercell_kessler_data.nc'
data_link = "https://www.dropbox.com/s/nonpheml3309q7d/supercell_kessler_data.nc?dl=0"

# Download the data if necessary
if ( not os.path.isfile(path) ):
    print(f"Downloading data from:\n {data_link}...")
    !wget {data_link} -O {path}

print('Reading dataset...')

# Open NetCDF4 file, allocate input and output data arrays
nc = Dataset(path,'r')
[num_samples, num_vars_in, stencil_size] = nc.variables['inputs'].shape
input_from_file  = np.ndarray(shape=nc.variables['inputs' ].shape,dtype=np.single)
output_from_file = np.ndarray(shape=nc.variables['outputs'].shape,dtype=np.single)

# We need to chunk the reading to avoid overflowing available memory
num_chunks = 20
chunk_size = int(np.ceil(num_samples / num_chunks))
# Loop over chunks and load data
for ichunk in range(num_chunks) :
  ibeg = int( ichunk   *chunk_size)
  iend = int((ichunk+1)*chunk_size)
  if (ichunk == num_chunks-1) :  # Ensure we don't go past the last index
    iend = num_samples
  input_from_file [ibeg:iend,:,:] = nc.variables['inputs' ][ibeg:iend,:,:]
  output_from_file[ibeg:iend,:]   = nc.variables['outputs'][ibeg:iend,:]
  print(f'  * Finished reading chunk {ichunk+1} of {num_chunks}')

nc.close()

print('Shuffling dataset...')

# Randomly shuffle the samples before saving to file
permuted_indices = np.random.permutation(np.arange(0, num_samples))
input_from_file  = input_from_file [permuted_indices[:],:,:]
output_from_file = output_from_file[permuted_indices[:],:]

print('Saving data to file...')

np.savez('supercell_kessler_data.npz',
         input_from_file=input_from_file, output_from_file=output_from_file)


Downloading data from:
 https://www.dropbox.com/s/nonpheml3309q7d/supercell_kessler_data.nc?dl=0...
--2022-06-09 22:41:59--  https://www.dropbox.com/s/nonpheml3309q7d/supercell_kessler_data.nc?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.6.18, 2620:100:6022:18::a27d:4212
Connecting to www.dropbox.com (www.dropbox.com)|162.125.6.18|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/nonpheml3309q7d/supercell_kessler_data.nc [following]
--2022-06-09 22:41:59--  https://www.dropbox.com/s/raw/nonpheml3309q7d/supercell_kessler_data.nc
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://ucb979522013c78db74274592a8c.dl.dropboxusercontent.com/cd/0/inline/Bm4QqmKBjj9wwd8VKsCojZAGacMlLDJkdC2D73mk7n6S_psboiv-K3UQTEm6FDwI8R1jiVBZnFa4JfufmmlpmVWW-dmy9IYXCpWhOU9eRU0DVJCQbnr7dxmEmwBeUFTn3lCHK3I29IfFcgOTOKbPpX7cb4jH4iefJsLyrNzgNP9cEw/file# [following]
--2022-06-09 22:42:00--