In [10]:
pid = os.getpid()
page_size = resource.getpagesize()
pid, page_size

NameError: name 'resource' is not defined

Here, we're going to set ourselves a "soft" and "hard" limit on the "addressable space" (AS). Addressable space consists of all of the data in RAM *and* the virtual memory on disk. But for this experiment, we don't want to use virtual memory at all. That's OK, we'll set AS to 10MB, so we have to be *really* disciplined with our memory usage. We'll leave the hard limit unset. Users aren't permitted to ever *raise* their hard limit, so we'd have to restart the process to get our hard limit back up.

In [3]:
#resource.setrlimit(resource.RLIMIT_AS, (500 * 1024 * 1024, resource.RLIM_INFINITY))

In [4]:
resource.getrusage(resource.RUSAGE_CHILDREN)

resource.struct_rusage(ru_utime=0.00244, ru_stime=0.002669, ru_maxrss=20456, ru_ixrss=0, ru_idrss=0, ru_isrss=0, ru_minflt=874, ru_majflt=2, ru_nswap=0, ru_inblock=184, ru_oublock=0, ru_msgsnd=0, ru_msgrcv=0, ru_nsignals=0, ru_nvcsw=6, ru_nivcsw=4)

In [5]:
resource.getrusage(resource.RUSAGE_CHILDREN)[4]

0

In [5]:
pid = os.getpid()
!ps -p $pid -o pid,user,args,rss,resident,share,size,vsize

  PID USER     COMMAND                       RSS   RES -  SIZE    VSZ
 7115 hiebert  /home/hiebert/code/netcdf-t 36264     - - 412168 573160


In [7]:
pid

7267

In [13]:
resource.setrlimit(resource.RLIMIT_STACK, (50 * 1024 * 1024, resource.RLIM_INFINITY))

In [14]:
resource.getrlimit(resource.RLIMIT_STACK)

(52428800, -1)

In [15]:
shape = (256, 1024, 1024)
x = np.empty(shape, dtype='float32')

Perfect! We raised a `MemoryError`. Normally we don't *want* errors, but if we're trying solve a problem ahead of time, simulating an error is super useful. Now we can't pretend that our method will work for a small testing data set, but then won't scale to a larger data set.

Let's start by creating another big NetCDF file and then discuss ways to process it. We'll need to turn off our memory limit temporarily to create it.

In [9]:
resource.setrlimit(resource.RLIMIT_AS, (resource.RLIM_INFINITY, resource.RLIM_INFINITY))

In [10]:
resource.getrlimit(resource.RLIMIT_AS)

(-1, -1)

In [11]:
# %load ../src/utils.py
import os
import math
from tempfile import NamedTemporaryFile

import netCDF4
import numpy as np

def make_netcdf3_file(shape, variable_name='some_variable', dir_=os.getcwd()):

    with NamedTemporaryFile(suffix='.nc', dir=dir_, delete=False) as f:

        nc = netCDF4.Dataset(f.name, 'w', format='NETCDF3_CLASSIC')
        nc.createDimension('x', shape[2])
        nc.createDimension('y', shape[1])
        nc.createDimension('z', shape[0])
        some_var = nc.createVariable('some_variable','f4',('z', 'y', 'x'))

    def trip(x, y, z):
        d = math.sqrt((x - 256) ** 2 + (y - 256) ** 2)
        return math.sin(d / 64) + math.sin(z)
    trip_v = np.vectorize(trip)

    x, y = np.meshgrid(range(shape[2]), range(shape[1]))
    basegrid = trip_v(x, y, 0).astype('float32')

    for z in range(shape[0]):
        a = basegrid + math.sin(z / 32)
        some_var[z,:,:] = a

    nc.close()

    return f.name


In [12]:
nc = make_netcdf3_file((256, 1024, 1024))

In [13]:
nc

'/home/hiebert/code/netcdf-tutorial/notebooks/tmpgmimbaxq.nc'

In [14]:
!ps -p $pid -o pid,user,args,rss,resident,share,size,vsize

  PID USER     COMMAND                       RSS   RES -  SIZE    VSZ
 5590 hiebert  /home/hiebert/code/netcdf-t 74224     - - 1494356 1734584


In [17]:
!cat /proc/$pid/statm

171501 18555 2575 852 0 111444 0


In [18]:
x = "foo"

In [19]:
y = "bar"

In [20]:
!cat /proc/$pid/statm

171501 18555 2575 852 0 111444 0


In [3]:
from collections import namedtuple
MemUsage = namedtuple('MemUsage', 'size resident share text lib data dt')
class HumanMemUsage(MemUsage):
    __slots__ = ()
    page_size = resource.getpagesize()
    @property
    def data_bytes(self):
        return (self.resident - self.text) * self.page_size

In [6]:
with open('/proc/{}/statm'.format(pid)) as f:
    string = f.read()
MemUsage._make([int(x) for x in string.split()])

MemUsage(size=143290, resident=9067, share=2012, text=852, lib=0, data=103042, dt=0)

In [53]:
with open('/proc/{}/statm'.format(pid)) as f:
    string = f.read()
m = HumanMemUsage(*[int(x) for x in string.split()])
m.data, m.resident - m.text

(111636, 17975)

In [54]:
pid

5590

In [55]:
resource.getrlimit(resource.RLIMIT_STACK)

(8388608, -1)

In [12]:
import numpy as np

a = np.empty((1024, 1024, 512))

In [13]:
with open('/proc/{}/statm'.format(pid)) as f:
    string = f.read()
MemUsage._make([int(x) for x in string.split()])

MemUsage(size=1208251, resident=9098, share=2013, text=852, lib=0, data=1168003, dt=0)

In [14]:
1168003 - 103082 # in pages

1064921

In [17]:
(1168003 - 103082) * 4096 # bytes

4361916416

In [18]:
((1168003 - 103082) * 4096) / 1024 ** 2 # Mb

4159.84765625

In [27]:
resource.getrlimit(resource.RLIMIT_DATA)

(52428800, -1)

In [28]:
resource.setrlimit(resource.RLIMIT_DATA, (50 * 1024 * 1024, 50 * 1024 * 1024))

In [29]:
with open('/proc/{}/statm'.format(pid)) as f:
    string = f.read()
MemUsage._make([int(x) for x in string.split()])

MemUsage(size=2256892, resident=9127, share=2013, text=852, lib=0, data=2216644, dt=0)

In [33]:
c = np.empty((1024, 1024, 512))

In [34]:
with open('/proc/{}/statm'.format(pid)) as f:
    string = f.read()
MemUsage._make([int(x) for x in string.split()])

MemUsage(size=3305469, resident=9133, share=2013, text=852, lib=0, data=3265221, dt=0)

In [32]:
50 * 1024 * 1024 / 4096

12800.0