In [None]:
import sys
import os
from tempfile import NamedTemporaryFile
from gc import collect
from time import sleep
sys.path.append('../util')
from meters import ThroughputMeter, clear_host_cache
from ncgen import make_nc
from grids import *
import netCDF4
import numpy as np

In [None]:
def write_netcdf_file(timescale, time_major=True, grid=canada_5k):
    print("Creating a time-{} NetCDF file with {}x{} grid and {} time steps".format('major' if time_major else 'minor', grid['lon']['count'], grid['lat']['count'],len(timescale)))
    with NamedTemporaryFile(suffix='.nc', delete=False, dir='/app/tmp') as f:
        nc = make_nc(f.name, grid=grid, timescale=timescale, timemajor=time_major)
        nc.close()
    print("File size: {:.2f}Mb".format(os.path.getsize(f.name)/1024/1024))
    return f

../tmp path in the Docker container points at rotating media storage.

In [None]:
def netcdf_read_test(f, time_major):
    # Open the file just created
    nc = netCDF4.Dataset(f.name, 'r')
    if time_major:
        with ThroughputMeter() as t:
            a = nc.variables['var_0'][0,:,:]

    else:
        with ThroughputMeter() as t:
            a = nc.variables['var_0'][:,:,0]
    res = (time_major, len(timescale), t.megabytes_per_second(a))
    # python-netCDF4 seems to leak file descriptors
    # We have to take a lot of steps to make sure that the files get closed and that
    # the space gets reclaimed by the OS
    nc.close
    del nc
    print("Removing {}".format(f.name))
    os.remove(f.name)
    f.close()
    collect()
    return res

### Test scenarios

In [None]:
time_major = [True, False]
grids = [world_250k, world_125k, canada_5k]
ts = [timescales['annual'], timescales['monthly'], timescales['daily']]

In [None]:
results = []

for tm in time_major:
    for grid in grids:
        for timescale in ts:
            testfile = write_netcdf_file(timescale, time_major=tm, grid=grid)
            clear_host_cache()
            results.append(netcdf_read_test(testfile, tm))

## Summarize Results

In [None]:
results

Reshape to dimensions timescale, grid, time_major

In [None]:
a = np.array([r[2] for r in results]).reshape(len(time_major),len(grids),len(ts))
a

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

f, (ax1, ax2, ax3) = plt.subplots(3, 1)
f.set_size_inches(18.5, 10.5)

ax1.plot(a[0,0,:], label="mjr")
ax1.plot(a[1,0,:], label="mnr")
ax1.legend()
ax1.set_title("world_250k")

ax2.plot(a[0,1,:], label="mjr")
ax2.plot(a[1,1,:], label="mnr")
ax2.set_title("world_125k")

ax3.plot(a[0,2,:], label="mjr")
ax3.plot(a[1,2,:], label="mnr")
ax3.set_title("canada_5k")