# test of using astropy Tables as chunk data

advantages:
* stored as dict of numpy arrays
* columns have units
* metadata keywords are stored

Comments:
* for I/O would be nice to add a `write( filename, append=True )`  at least for FITS and support a faster FITS backend that handles row-wise writing (like `fitsio`)
* `indata['DIST'] = dist` adds a column if 'DIST' is not there, but fails to do anything silently if 'DIST' already exists! it should return an error. The correct syntax would be `indata['DIST'][:] = dict` for existing data

In [1]:
from astropy import table as t
from astropy import units as u
import numpy as np

In [2]:
evid = np.arange(10) + 1024
X = np.linspace(-10,10,10, dtype=np.float32) * u.m
Y = np.linspace(-10,10,10, dtype=np.float32) * u.m
V = np.random.uniform(size=10) * u.erg

In [3]:
data = t.Table( [evid,X,Y,V], names={"EVENT_ID","DETX","DETY","VALUE"})
data.columns['DETX'].unit = u.m
data.meta["CREATOR"] = "karl"
data

DETY,DETX,EVENT_ID,VALUE
Unnamed: 0_level_1,m,m,erg
int64,float32,float32,float64
1024,-10.0,-10.0,0.486492020793
1025,-7.77778,-7.77778,0.734777434738
1026,-5.55556,-5.55556,0.911094238364
1027,-3.33333,-3.33333,0.594988113528
1028,-1.11111,-1.11111,0.969593849041
1029,1.11111,1.11111,0.162682656422
1030,3.33333,3.33333,0.357190789922
1031,5.55556,5.55556,0.342792002815
1032,7.77778,7.77778,0.244914722733
1033,10.0,10.0,0.626817020472


add a scalar column

In [4]:
energy = np.random.lognormal(2,1,size=10)
data['ENERGY'] = energy * u.TeV
data

DETY,DETX,EVENT_ID,VALUE,ENERGY
Unnamed: 0_level_1,m,m,erg,TeV
int64,float32,float32,float64,float64
1024,-10.0,-10.0,0.486492020793,3.79838838764
1025,-7.77778,-7.77778,0.734777434738,1.53419655623
1026,-5.55556,-5.55556,0.911094238364,8.26214135805
1027,-3.33333,-3.33333,0.594988113528,42.9689112061
1028,-1.11111,-1.11111,0.969593849041,11.3833133852
1029,1.11111,1.11111,0.162682656422,6.55540353696
1030,3.33333,3.33333,0.357190789922,3.63970021986
1031,5.55556,5.55556,0.342792002815,4.87605412057
1032,7.77778,7.77778,0.244914722733,44.7736081525
1033,10.0,10.0,0.626817020472,10.6826857127


add a vector column:

In [5]:
pixvals = np.arange(10*5).reshape(10,5) * 0.1
data['PIXVALS'] = pixvals
data

DETY,DETX,EVENT_ID,VALUE,ENERGY,PIXVALS [5]
Unnamed: 0_level_1,m,m,erg,TeV,Unnamed: 5_level_1
int64,float32,float32,float64,float64,float64
1024,-10.0,-10.0,0.486492020793,3.79838838764,0.0 .. 0.4
1025,-7.77778,-7.77778,0.734777434738,1.53419655623,0.5 .. 0.9
1026,-5.55556,-5.55556,0.911094238364,8.26214135805,1.0 .. 1.4
1027,-3.33333,-3.33333,0.594988113528,42.9689112061,1.5 .. 1.9
1028,-1.11111,-1.11111,0.969593849041,11.3833133852,2.0 .. 2.4
1029,1.11111,1.11111,0.162682656422,6.55540353696,2.5 .. 2.9
1030,3.33333,3.33333,0.357190789922,3.63970021986,3.0 .. 3.4
1031,5.55556,5.55556,0.342792002815,4.87605412057,3.5 .. 3.9
1032,7.77778,7.77778,0.244914722733,44.7736081525,4.0 .. 4.4
1033,10.0,10.0,0.626817020472,10.6826857127,4.5 .. 4.9


In [6]:
data['PIXVALS'] *= 3
data

DETY,DETX,EVENT_ID,VALUE,ENERGY,PIXVALS [5]
Unnamed: 0_level_1,m,m,erg,TeV,Unnamed: 5_level_1
int64,float32,float32,float64,float64,float64
1024,-10.0,-10.0,0.486492020793,3.79838838764,0.0 .. 1.2
1025,-7.77778,-7.77778,0.734777434738,1.53419655623,1.5 .. 2.7
1026,-5.55556,-5.55556,0.911094238364,8.26214135805,3.0 .. 4.2
1027,-3.33333,-3.33333,0.594988113528,42.9689112061,4.5 .. 5.7
1028,-1.11111,-1.11111,0.969593849041,11.3833133852,6.0 .. 7.2
1029,1.11111,1.11111,0.162682656422,6.55540353696,7.5 .. 8.7
1030,3.33333,3.33333,0.357190789922,3.63970021986,9.0 .. 10.2
1031,5.55556,5.55556,0.342792002815,4.87605412057,10.5 .. 11.7
1032,7.77778,7.77778,0.244914722733,44.7736081525,12.0 .. 13.2
1033,10.0,10.0,0.626817020472,10.6826857127,13.5 .. 14.7


In [7]:
from astropy import io

In [8]:
io.registry.get_formats()

Data class,Format,Read,Write,Auto-identify,Deprecated
string40,unicode864,unicode96,unicode96,unicode96,unicode96
Table,ascii,Yes,Yes,No,
Table,ascii.aastex,Yes,Yes,No,
Table,ascii.basic,Yes,Yes,No,
Table,ascii.cds,Yes,No,No,
Table,ascii.commented_header,Yes,Yes,No,
Table,ascii.csv,Yes,Yes,No,
Table,ascii.daophot,Yes,No,No,
Table,ascii.ecsv,Yes,Yes,No,
Table,ascii.fast_basic,Yes,Yes,No,
Table,ascii.fast_commented_header,Yes,Yes,No,


In [9]:
def process_data(indata):
    x = indata['DETX']
    y = indata['DETY']
    dist = np.sqrt(x**2+y**2)
    indata['DIST'] = dist
    return indata

def correct_energy(indata):
    E = indata['ENERGY']
    E += 0.1
    return indata

In [10]:
process_data(data)
correct_energy(data)
data

DETY,DETX,EVENT_ID,VALUE,ENERGY,PIXVALS [5],DIST
Unnamed: 0_level_1,m,m,erg,TeV,Unnamed: 5_level_1,m
int64,float32,float32,float64,float64,float64,float64
1024,-10.0,-10.0,0.486492020793,3.89838838764,0.0 .. 1.2,1024.04882696
1025,-7.77778,-7.77778,0.734777434738,1.63419655623,1.5 .. 2.7,1025.02950876
1026,-5.55556,-5.55556,0.911094238364,8.36214135805,3.0 .. 4.2,1026.01504092
1027,-3.33333,-3.33333,0.594988113528,43.0689112061,4.5 .. 5.7,1027.00540948
1028,-1.11111,-1.11111,0.969593849041,11.4833133852,6.0 .. 7.2,1028.00060047
1029,1.11111,1.11111,0.162682656422,6.65540353696,7.5 .. 8.7,1029.00059989
1030,3.33333,3.33333,0.357190789922,3.73970021986,9.0 .. 10.2,1030.00539373
1031,5.55556,5.55556,0.342792002815,4.97605412057,10.5 .. 11.7,1031.01496798
1032,7.77778,7.77778,0.244914722733,44.8736081525,12.0 .. 13.2,1032.02930861
1033,10.0,10.0,0.626817020472,10.7826857127,13.5 .. 14.7,1033.04840158


In [11]:
del data['VALUE']
data

DETY,DETX,EVENT_ID,ENERGY,PIXVALS [5],DIST
Unnamed: 0_level_1,m,m,TeV,Unnamed: 4_level_1,m
int64,float32,float32,float64,float64,float64
1024,-10.0,-10.0,3.89838838764,0.0 .. 1.2,1024.04882696
1025,-7.77778,-7.77778,1.63419655623,1.5 .. 2.7,1025.02950876
1026,-5.55556,-5.55556,8.36214135805,3.0 .. 4.2,1026.01504092
1027,-3.33333,-3.33333,43.0689112061,4.5 .. 5.7,1027.00540948
1028,-1.11111,-1.11111,11.4833133852,6.0 .. 7.2,1028.00060047
1029,1.11111,1.11111,6.65540353696,7.5 .. 8.7,1029.00059989
1030,3.33333,3.33333,3.73970021986,9.0 .. 10.2,1030.00539373
1031,5.55556,5.55556,4.97605412057,10.5 .. 11.7,1031.01496798
1032,7.77778,7.77778,44.8736081525,12.0 .. 13.2,1032.02930861
1033,10.0,10.0,10.7826857127,13.5 .. 14.7,1033.04840158


In [14]:
# rows give back a recarray 
print data[0]

<Row 0 of table
 values=(1024, -10.0, -10.0, 3.898388387639396, [0.0, 0.30000000000000004, 0.6000000000000001, 0.9000000000000001, 1.2000000000000002], 1024.0488269609023)
 dtype=[('DETY', '<i8'), ('DETX', '<f4'), ('EVENT_ID', '<f4'), ('ENERGY', '<f8'), ('PIXVALS', '<f8', (5,)), ('DIST', '<f8')]>


In [18]:
# can also convert the whole thing into a recarray
# *copies the data*
# makes it possible to write using fitsio, though!
print data.as_array()
print data.meta

[ (1024, -10.0, -10.0, 3.898388387639396, [0.0, 0.30000000000000004, 0.6000000000000001, 0.9000000000000001, 1.2000000000000002], 1024.0488269609023)
 (1025, -7.777777671813965, -7.777777671813965, 1.6341965562291882, [1.5, 1.8000000000000003, 2.1, 2.4000000000000004, 2.7], 1025.0295087576771)
 (1026, -5.55555534362793, -5.55555534362793, 8.36214135805351, [3.0, 3.3000000000000003, 3.6000000000000005, 3.9000000000000004, 4.2], 1026.0150409203902)
 (1027, -3.3333330154418945, -3.3333330154418945, 43.0689112060886, [4.5, 4.800000000000001, 5.1000000000000005, 5.4, 5.7], 1027.005409483699)
 (1028, -1.1111106872558594, -1.1111106872558594, 11.483313385166465, [6.0, 6.300000000000001, 6.6000000000000005, 6.9, 7.200000000000001], 1028.00060047012)
 (1029, 1.1111116409301758, 1.1111116409301758, 6.655403536957618, [7.5, 7.800000000000001, 8.100000000000001, 8.4, 8.700000000000001], 1029.0005998876156)
 (1030, 3.333333969116211, 3.333333969116211, 3.7397002198575326, [9.0, 9.3, 9.6000000000000

In [19]:
%timeit correct_energy(data)

The slowest run took 8.92 times longer than the fastest. This could mean that an intermediate result is being cached 
100000 loops, best of 3: 3.58 µs per loop
