In [88]:
import sys
import os
from tempfile import NamedTemporaryFile
from gc import collect

sys.path.append('../util')
from meters import ThroughputMeter
from ncgen import *
from grids import *

import netCDF4
import numpy as np
import pylab as pl


In [101]:
def write_netcdf_file(timescale, time_major=True):
    print("Creating a time-{} NetCDF file with {}x{} grid and {} time steps".format('major' if time_major else 'minor', grid['lon']['count'], grid['lat']['count'],len(timescale)))
    with NamedTemporaryFile(suffix='.nc', delete=False, dir='../tmp') as f:
        nc = get_base_nc(f.name, world_125k)
        nc = add_simple_time(nc, timescale)
        nc = add_climo_data(nc, 'var_0', timemajor=time_major)
        nc.close()
    print("file size: {}".format(os.path.getsize(f.name)))
    return f

../tmp path in the Docker container points at rotating media storage.

In [177]:
def netcdf_read_test(f):
    results = []
    # Open the file just created
    nc = netCDF4.Dataset(f.name, 'r')
    with ThroughputMeter() as t:
        for i in range(nc.variables['var_0'].shape[0]):
            a = nc.variables['var_0'][i,:,:]
    results.append((time_major, len(timescale), t.megabytes_per_second(nc.variables['var_0'])))
    # python-netCDF4 seems to leak file descriptors
    # We have to take a lot of steps to make sure that the files get closed and that
    # the space gets reclaimed by the OS
    nc.close
    del nc
    print("Removing {}".format(f.name))
    os.remove(f.name)
    f.close()
    collect()
    return results

In [92]:
def hdf5_read_test(f):
    pass

###Run time-major test for world_125k grid at monthly timescale:

In [136]:
grid = world_125k
timescale = timescales['monthly']
time_major = True
testfile = write_netcdf_file(timescale, time_major)

Creating a time-major NetCDF file with 256x128 grid and 1800 time steps
file size: 235945088


####Need to empty the filesystem buffers and cache after writing the file, to ensure it is read back from disk and not memory:

In [137]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3368       4313        706          2       1011
-/+ buffers/cache:       2354       5327
Swap:            0          0          0


In [138]:
!sync

####Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [139]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3129       4552        706          2        783
-/+ buffers/cache:       2343       5338
Swap:            0          0          0


In [140]:
results_tmaj_world_125k_monthly = netcdf_read_test(testfile)

225.000 MB in 1.13 seconds at 198.323 MB / sec
Removing /app/tmp/tmpg6tg01e7.nc


In [141]:
results_tmaj_world_125k_monthly

[(True, 1800, 198.32333154845114)]

###Run time-minor test for world_125k grid at monthly timescale:

In [142]:
time_major = False
testfile = write_netcdf_file(timescale, time_major)

Creating a time-minor NetCDF file with 256x128 grid and 1800 time steps
file size: 235945088


In [143]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3327       4354        701          3       1003
-/+ buffers/cache:       2320       5361
Swap:            0          0          0


In [144]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [145]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3106       4574        711          1        788
-/+ buffers/cache:       2317       5364
Swap:            0          0          0


In [146]:
results_tmin_world_125k_monthly = netcdf_read_test(testfile)

225.000 MB in 1.18 seconds at 190.959 MB / sec
Removing /app/tmp/tmpv5bvqj_d.nc


In [147]:
results_tmin_world_125k_monthly

[(False, 1800, 190.95857026478723)]

###Run time-major test for canada_5k grid at monthly timescale:

In [148]:
grid = canada_5k
timescale = timescales['monthly']
time_major = True
testfile = write_netcdf_file(timescale, time_major)

Creating a time-major NetCDF file with 1068x510 grid and 1800 time steps
file size: 235945088


In [149]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3352       4329        706          1       1009
-/+ buffers/cache:       2341       5340
Swap:            0          0          0


In [150]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [151]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3121       4559        706          0        783
-/+ buffers/cache:       2337       5344
Swap:            0          0          0


In [152]:
results_tmaj_canada_5k_monthly = netcdf_read_test(testfile)

225.000 MB in 1.31 seconds at 171.610 MB / sec
Removing /app/tmp/tmpls3knvac.nc


In [153]:
results_tmaj_canada_5k_monthly

[(True, 1800, 171.60962580847888)]

###Run time-minor test for canada_5k grid at monthly timescale:

In [154]:
grid = canada_5k
timescale = timescales['monthly']
time_major = False
testfile = write_netcdf_file(timescale, time_major)

Creating a time-minor NetCDF file with 1068x510 grid and 1800 time steps
file size: 235945088


In [155]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3379       4302        701          2       1004
-/+ buffers/cache:       2373       5308
Swap:            0          0          0


In [156]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [157]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3125       4556        705          0        782
-/+ buffers/cache:       2341       5340
Swap:            0          0          0


In [158]:
results_tmin_canada_5k_monthly = netcdf_read_test(testfile)

225.000 MB in 1.37 seconds at 163.778 MB / sec
Removing /app/tmp/tmpi7_6zru3.nc


In [159]:
results_tmin_canada_5k_monthly

[(False, 1800, 163.77775876494158)]

###Run time-major test for canada_5k grid at daily timescale:

In [190]:
grid = canada_5k
timescale = timescales['daily']
time_major = True
testfile = write_netcdf_file(timescale, time_major)

Creating a time-major NetCDF file with 1068x510 grid and 54787 time steps
file size: 7181269100


In [191]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       7514        167        711          2       5023
-/+ buffers/cache:       2488       5193
Swap:            0          0          0


In [192]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [193]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3136       4545        706          2        765
-/+ buffers/cache:       2368       5313
Swap:            0          0          0


In [194]:
results_tmaj_canada_5k_daily = netcdf_read_test(testfile)

6848.375 MB in 36.3 seconds at 188.614 MB / sec
Removing /app/tmp/tmph11fyyet.nc


In [195]:
results_tmaj_canada_5k_daily

[(True, 54787, 188.61440551039792)]

###Run time-minor test for canada_5k grid at daily timescale:

In [202]:
grid = canada_5k
timescale = timescales['daily']
time_major = False
testfile = write_netcdf_file(timescale, time_major)

Creating a time-minor NetCDF file with 1068x510 grid and 54787 time steps
file size: 7181269100


In [203]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       7424        257        699          2       4916
-/+ buffers/cache:       2505       5176
Swap:            0          0          0


In [204]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [206]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3116       4565        697          2        752
-/+ buffers/cache:       2361       5320
Swap:            0          0          0


In [207]:
results_tmin_canada_5k_daily = netcdf_read_test(testfile)

6848.375 MB in 37.5 seconds at 182.511 MB / sec
Removing /app/tmp/tmpp0sab2mw.nc


In [208]:
results_tmin_canada_5k_daily

[(False, 54787, 182.51077374403957)]

###Run time-major test for world_250k grid at daily timescale:

In [178]:
grid = world_250k
timescale = timescales['daily']
time_major = True
testfile = write_netcdf_file(timescale, time_major)

Creating a time-major NetCDF file with 128x64 grid and 54787 time steps
file size: 7181269100


In [179]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       7559        121        726          4       5031
-/+ buffers/cache:       2523       5157
Swap:            0          0          0


In [180]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [181]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3151       4530        726          2        786
-/+ buffers/cache:       2362       5318
Swap:            0          0          0


In [182]:
results_tmaj_world_250k_daily = netcdf_read_test(testfile)

6848.375 MB in 36.6 seconds at 187.132 MB / sec
Removing /app/tmp/tmplgqemk32.nc


In [188]:
results_tmaj_world_250k_daily

[(True, 54787, 187.1324676433039)]

###Run time-minor test for world_250k grid at daily timescale:

In [196]:
grid = world_250k
timescale = timescales['daily']
time_major = False
testfile = write_netcdf_file(timescale, time_major)

Creating a time-minor NetCDF file with 128x64 grid and 54787 time steps
file size: 7181269100


In [197]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       7404        277        762          5       4874
-/+ buffers/cache:       2524       5157
Swap:            0          0          0


In [198]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [199]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3233       4447        773          1        827
-/+ buffers/cache:       2405       5276
Swap:            0          0          0


In [200]:
results_tmin_world_250k_daily = netcdf_read_test(testfile)

6848.375 MB in 38.0 seconds at 180.391 MB / sec
Removing /app/tmp/tmpyurz77ek.nc


In [201]:
results_tmin_world_250k_daily

[(False, 54787, 180.3910584406552)]

###Run time-major test for bc_400m grid at monthly timescale:

In [209]:
grid = bc_400m
timescale = timescales['monthly']
time_major = True
testfile = write_netcdf_file(timescale, time_major)

Creating a time-major NetCDF file with 1680x3241 grid and 1800 time steps
file size: 235945088


In [210]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3383       4298        694          7        985
-/+ buffers/cache:       2391       5290
Swap:            0          0          0


In [211]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [212]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3117       4564        699          1        753
-/+ buffers/cache:       2362       5318
Swap:            0          0          0


In [213]:
results_tmaj_bc_400m_monthly = netcdf_read_test(testfile)

225.000 MB in 1.16 seconds at 193.842 MB / sec
Removing /app/tmp/tmpr000c1vz.nc


In [214]:
results_tmaj_bc_400m_monthly

[(True, 1800, 193.8415578995108)]

###Run time-minor test for bc_400m grid at monthly timescale:

In [215]:
grid = bc_400m
timescale = timescales['monthly']
time_major = False
testfile = write_netcdf_file(timescale, time_major)

Creating a time-minor NetCDF file with 1680x3241 grid and 1800 time steps
file size: 235945088


In [216]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3401       4280        713          2       1001
-/+ buffers/cache:       2397       5284
Swap:            0          0          0


In [217]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [218]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3135       4546        713          1        766
-/+ buffers/cache:       2366       5314
Swap:            0          0          0


In [219]:
results_tmin_bc_400m_monthly = netcdf_read_test(testfile)

225.000 MB in 1.19 seconds at 188.939 MB / sec
Removing /app/tmp/tmpa9ss0kht.nc


###Run time-major test for bc_400m grid at daily timescale:

In [220]:
grid = bc_400m
timescale = timescales['daily']
time_major = True
testfile = write_netcdf_file(timescale, time_major)

Creating a time-major NetCDF file with 1680x3241 grid and 54787 time steps
file size: 7181269100


In [221]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       7430        251        718          6       4989
-/+ buffers/cache:       2434       5247
Swap:            0          0          0


In [222]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [223]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3098       4583        709          1        765
-/+ buffers/cache:       2331       5350
Swap:            0          0          0


In [224]:
results_tmaj_bc_400m_daily = netcdf_read_test(testfile)

6848.375 MB in 36.1 seconds at 189.544 MB / sec
Removing /app/tmp/tmp3ral1rsi.nc


###Run time-minor test for bc_400m grid at daily timescale:

In [225]:
grid = bc_400m
timescale = timescales['daily']
time_major = False
testfile = write_netcdf_file(timescale, time_major)

Creating a time-minor NetCDF file with 1680x3241 grid and 54787 time steps
file size: 7181269100


In [226]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       7366        315        715          3       4933
-/+ buffers/cache:       2429       5252
Swap:            0          0          0


In [227]:
!sync

Then cleared cache via the command line (requires entry of su password): #sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches'

In [228]:
!free -m

             total       used       free     shared    buffers     cached
Mem:          7681       3119       4561        720          1        776
-/+ buffers/cache:       2342       5339
Swap:            0          0          0


In [229]:
results_tmin_bc_400m_daily = netcdf_read_test(testfile)

6848.375 MB in 38.3 seconds at 178.624 MB / sec
Removing /app/tmp/tmpudg_z_ys.nc
