# Table and memmap

## Create a test file

In [1]:
import numpy as np
from astropy.table import Table

In [2]:
tbl = Table({
    'a': np.arange(1e7),
    'b': np.arange(1e7, dtype=float),
    'c': np.arange(1e7, dtype=float),
})

In [3]:
tmpfile = 'test.fits'

In [4]:
tbl.write(tmpfile, overwrite=True)

In [5]:
ls -lh test.fits

-rw-r--r-- 1 simon users 229M 12 avril 14:40 test.fits


In [6]:
%reset -f

ERROR:root:Invalid alias: The name clear can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name more can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name less can't be aliased because it is another magic command.
ERROR:root:Invalid alias: The name man can't be aliased because it is another magic command.


## Read without memmap

Here `Table.read` loads the full table in memory.

In [7]:
import ipython_memory_usage.ipython_memory_usage as imu
from astropy.table import Table

In [8]:
imu.start_watching_memory()

In [8] used 0.0508 MiB RAM in 0.11s, peaked 0.00 MiB above current, total RAM usage 91.41 MiB


In [9]:
t = Table.read('test.fits', memmap=False)

In [9] used 229.0859 MiB RAM in 0.23s, peaked 0.00 MiB above current, total RAM usage 320.49 MiB


In [10]:
t = None

In [10] used -228.8516 MiB RAM in 0.12s, peaked 228.85 MiB above current, total RAM usage 91.64 MiB


## Read column with memmap

`Table.read` does not load the data, but as soon as a column data is used the whole data is loaded.

In [11]:
t = Table.read('test.fits', memmap=True)

In [11] used 0.1523 MiB RAM in 0.11s, peaked 0.00 MiB above current, total RAM usage 91.79 MiB


In [12]:
a = t['a']

In [12] used 0.0000 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 91.79 MiB


In [13]:
a.sum()

49999995000000.0

In [13] used 228.3125 MiB RAM in 0.15s, peaked 0.00 MiB above current, total RAM usage 320.11 MiB


In [14]:
a = None
t = None

In [14] used -228.1953 MiB RAM in 0.12s, peaked 228.20 MiB above current, total RAM usage 91.91 MiB


## Read row with memmap

`Table.read` does not load the data, and thanks to memmap slicing rows from the table loads only these rows in memory.

In [15]:
t = Table.read('test.fits', memmap=True)

In [15] used 0.0273 MiB RAM in 0.11s, peaked 0.00 MiB above current, total RAM usage 91.94 MiB


In [16]:
t2 = t[:1_000]

In [16] used 0.0234 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 91.96 MiB


In [17]:
t2['a'].sum()

499500.0

In [17] used 0.1016 MiB RAM in 0.11s, peaked 0.00 MiB above current, total RAM usage 92.06 MiB


In [18]:
t2 = t[:100_000]

In [18] used 0.0000 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 92.06 MiB


In [19]:
t2['a'].sum()

4999950000.0

In [19] used 0.0000 MiB RAM in 0.10s, peaked 0.00 MiB above current, total RAM usage 92.06 MiB
