In [1]:
import sys
import os
from tempfile import NamedTemporaryFile
from gc import collect

sys.path.append('../util')
from meters import ThroughputMeter
import ncgen

import netCDF4
import h5py
import numpy as np
import pylab as pl

os.environ['TMPDIR'] = os.getenv('HOME') + '/tmp/'

In [6]:
results = []
for num_vars in range(1, 20, 2):
    print("Creating a NetCDF file with {} variables".format(num_vars))
    with NamedTemporaryFile(suffix='.nc', delete=False) as f:
        nc = ncgen.make_multivariable_nc(f.name, num_vars=num_vars, unlim=True)
        nc.close()
    for read_with_h5py in [True, False]:
        if read_with_h5py:
            print("Reading out with h5py module...")
            nc = h5py.File(f.name, 'r')
            with ThroughputMeter() as t:
                a = nc['var_0'][:, :, :]
        else:
            print("Reading out with python-netCDF4 module...")
            nc = netCDF4.Dataset(f.name, 'r')
            with ThroughputMeter() as t:
                a = nc.variables['var_0'][:, :, :]
        results.append((read_with_h5py, num_vars, t.megabytes_per_second(a)))
    # python-netCDF4 seems to leak file descriptors
    # We have to take a lot of steps to make sure that the files get closed and that
    # the space gets reclaimed by the OS
    nc.close
    del nc
    print("Removing {}".format(f.name))
    os.remove(f.name)
    f.close()
    collect()

Creating a NetCDF file with 1 variables
Reading out with h5py module...
353.100 MB in 0.241 seconds at 1466.836 MB / sec
Reading out with python-netCDF4 module...
353.100 MB in 0.392 seconds at 901.249 MB / sec
Removing /tmp/tmpu_7panpa.nc
Creating a NetCDF file with 3 variables
Reading out with h5py module...
353.100 MB in 1.6 seconds at 220.343 MB / sec
Reading out with python-netCDF4 module...
353.100 MB in 0.387 seconds at 913.356 MB / sec
Removing /tmp/tmpsr4lgav9.nc
Creating a NetCDF file with 5 variables
Reading out with h5py module...
353.100 MB in 1.5 seconds at 235.765 MB / sec
Reading out with python-netCDF4 module...
353.100 MB in 0.427 seconds at 826.521 MB / sec
Removing /tmp/tmpgutbbx5_.nc
Creating a NetCDF file with 7 variables
Reading out with h5py module...
353.100 MB in 1.48 seconds at 237.951 MB / sec
Reading out with python-netCDF4 module...
353.100 MB in 0.417 seconds at 846.691 MB / sec
Removing /tmp/tmp3ps2ftfq.nc
Creating a NetCDF file with 9 variables
Reading 