# Ingesting Private Datasets (v2.0)

In [1]:
# imports
import warnings
warnings.filterwarnings('ignore')

import h5py
import specdb
import glob

from astropy.table import Table
from linetools import utils as ltu

from specdb.build import privatedb as pbuild
from specdb.build import utils as spbu
from specdb.specdb import IgmSpec

## Test on Single Folder

In [2]:
tree = specdb.__path__[0]+'/build/tests/files'
#os.getenv('DROPBOX_DIR')+'/QSOPairs/data/MMT_redux/'

In [3]:
reload(pbuild)
flux_files = pbuild.grab_files(tree)
len(flux_files)

3

In [4]:
flux_files[:5]

([u'/Users/xavier/local/Python/specdb/specdb/build/tests/files//./SDSSJ001605.89+005654.3_b800_F.fits.gz',
  u'/Users/xavier/local/Python/specdb/specdb/build/tests/files//./SDSSJ001607.27+005653.1_b800_F.fits.gz'],
 None,
 None)

----

## Directory Tree -- Step by Step

In [5]:
tree2 = specdb.__path__[0]+'/data/test_privateDB'

In [6]:
branches = glob.glob(tree2+'/*')
branches[0]

'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS'

### Get started

In [7]:
id_key = 'TEST_ID'
maindb, tkeys = spbu.start_maindb(id_key)

### Files

In [8]:
reload(pbuild)
mflux_files, meta_file, _ = pbuild.grab_files(branches[0])
len(mflux_files)

2

In [9]:
mflux_files[:5]

[u'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095240.17+515250.03.fits.gz',
 u'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095243.05+515121.15.fits.gz']

In [10]:
meta_file

u'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS/COS_meta.json'

In [11]:
meta_dict = ltu.loadjson(meta_file)
meta_dict

{u'maxpix': 60000,
 u'meta_dict': {u'TELESCOPE': u'HST'},
 u'parse_head': {u'DATE-OBS': u'DATE',
  u'GRATING': u'OPT_ELEM',
  u'INSTR': u'INSTRUME',
  u'R': True}}

### ztbl  (read from file)

In [12]:
ztbl = Table.read(specdb.__path__[0]+'/data/test_privateDB/testDB_ztbl.fits')
ztbl

RA,DEC,ZEM,ZEM_SOURCE,SPEC_FILE
float64,float64,float64,str5,str35
331.992916667,12.9956388889,1.0,UNKNW,SDSSJ220758.30+125944.3_F.fits
261.35275,30.6344166667,1.1,UNKNW,SDSSJ172524.66+303803.9_F.fits
345.184833333,1.92825,1.2,UNKNW,SDSSJ230044.36+015541.7_r600_F.fits
345.184833333,1.92825,1.2,UNKNW,SDSSJ230044.36+015541.7_b400_F.fits
148.167375,51.8805638889,1.3,UNKNW,J095240.17+515250.03.fits.gz
148.179375,51.855875,1.4,UNKNW,J095243.05+515121.15.fits.gz


### Meta

In [13]:
reload(pbuild)
meta = pbuild.mk_meta(mflux_files, ztbl, fname=True, mdict=meta_dict['meta_dict'], parse_head=meta_dict['parse_head'])

In [14]:
meta[0:3]

RA_GROUP,DEC_GROUP,STYPE,zem_GROUP,sig_zem,flag_zem,SPEC_FILE,DATE-OBS,GRATING,R,INSTR,TELESCOPE,EPOCH,GROUP_ID
float64,float64,str3,float64,float64,str8,unicode96,str10,str5,float64,str3,unicode3,float64,int64
148.167375,51.8805638889,QSO,1.3,0.0,UNKNW,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095240.17+515250.03.fits.gz,2015-05-31,G130M,17000.0,COS,HST,2000.0,0
148.179375,51.855875,QSO,1.4,0.0,UNKNW,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095243.05+515121.15.fits.gz,2015-12-08,G130M,17000.0,COS,HST,2000.0,1


#### Without fname=True
    Requires SPEC_FILE in ztbl

In [15]:
meta2 = pbuild.mk_meta(mflux_files, ztbl, fname=False, mdict=meta_dict['meta_dict'], parse_head=meta_dict['parse_head'])

In [16]:
meta2

RA_GROUP,DEC_GROUP,STYPE,zem_GROUP,sig_zem,flag_zem,SPEC_FILE,DATE-OBS,GRATING,R,INSTR,TELESCOPE,EPOCH,GROUP_ID
float64,float64,str3,float64,float64,str8,unicode96,str10,str5,float64,str3,unicode3,float64,int64
148.167375,51.8805638889,QSO,1.3,0.0,UNKNW,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095240.17+515250.03.fits.gz,2015-05-31,G130M,17000.0,COS,HST,2000.0,0
148.179375,51.855875,QSO,1.4,0.0,UNKNW,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095243.05+515121.15.fits.gz,2015-12-08,G130M,17000.0,COS,HST,2000.0,1


### Add Group and IDs

In [17]:
gdict = {}
flag_g = spbu.add_to_group_dict('COS', gdict)
maindb = pbuild.add_ids(maindb, meta, flag_g, tkeys, id_key, first=(flag_g==1))

The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... INSTR TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ----- --------- ----- --------


In [18]:
maindb

flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,TEST_ID
int64,float64,str8,float64,float64,str3,float64,int64
1,0.0,UNKNW,148.167375,51.8805638889,QSO,1.3,0
1,0.0,UNKNW,148.179375,51.855875,QSO,1.4,1


In [19]:
gdict

{'COS': 1}

### Spectra

In [24]:
hdf = h5py.File('tmp.hdf5','w')

In [25]:
reload(pbuild)
pbuild.ingest_spectra(hdf, 'test', meta, max_npix=meta_dict['maxpix'])

Adding test group to DB


### Finish

In [26]:
pbuild.write_hdf(hdf, 'TEST_DB', maindb, [str('SDSS')], gdict, 'v01')

## Directory Tree -- All in One

In [27]:
ztbl = Table.read(specdb.__path__[0]+'/data/test_privateDB/testDB_ztbl.fits')

In [28]:
reload(pbuild)
pbuild.mk_db('TEST_DB', tree2, 'tmp.hdf5', ztbl, fname=True)

Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... INSTR TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ----- --------- ----- --------
Adding COS group to DB
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/ESI
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... GRATING EPOCH GROUP_ID tGRB
-------- --------- ----- --------- ------- ... ------- ----- -------- ----
Adding ESI group to DB
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/LRIS
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... GRATING TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ------- --------- ----- --------
Adding LRIS group to DB
Wrote tmp.hdf5 DB file


In [29]:
# Without fname
pbuild.mk_db('TEST_DB', tree2, 'tmp2.hdf5', ztbl, fname=False)

Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... INSTR TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ----- --------- ----- --------
Adding COS group to DB
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/ESI
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... GRATING EPOCH GROUP_ID tGRB
-------- --------- ----- --------- ------- ... ------- ----- -------- ----
Adding ESI group to DB
Working on branch: /Users/xavier/local/Python/specdb/specdb/data/test_privateDB/LRIS
The following sources were previously in the DB
RA_GROUP DEC_GROUP STYPE zem_GROUP sig_zem ... GRATING TELESCOPE EPOCH GROUP_ID
-------- --------- ----- --------- ------- ... ------- --------- ----- --------
Adding LRIS group to DB
Wrote tmp2.hdf5 DB file


### By script
    specdb_privatedb testDB ../../specdb/data/test_privateDB tst3_DB.hdf5

### Check ESI meta

In [30]:
igmsp = IgmSpec(db_file='tmp2.hdf5', verbose=True)

Using tmp2.hdf5 for the DB file
Available groups: [u'COS', u'ESI', u'LRIS']


In [31]:
igmsp['ESI'].meta

RA_GROUP,DEC_GROUP,STYPE,zem_GROUP,sig_zem,flag_zem,DATE-OBS,R,EPOCH,GROUP_ID,tGRB,PRIV_ID,NPIX,WV_MIN,WV_MAX,SPEC_FILE,INSTR,TELESCOPE,GRATING
float64,float64,str3,float64,float64,str8,str10,float64,float64,int64,str21,int64,int64,float64,float64,str98,str3,str7,str3
261.3528,30.6344,QSO,1.1,0.0,UNKNW,2015-05-19,4545.0,2000.0,0,2009-11-23:10:12:13.2,2,27931,3993.5,10131.6,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/ESI//./SDSSJ172524.66+303803.9_F.fits,ESI,Keck-II,ECH
331.9929,12.9956,QSO,1.0,0.0,UNKNW,2008-06-04,4545.0,2000.0,1,2007-08-13:10:22:23.3,3,27926,3993.5,10129.9,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/ESI//./SDSSJ220758.30+125944.3_F.fits,ESI,Keck-II,ECH


----

## JSON files for meta table

In [32]:
parse_head = {'DATE-OBS':'DATE', 'TELESCOPE':'TELESCOP','INSTR':'INSTRUME', 'R': True}
mdict = dict(GRATING='ALL', R=8000.)

In [33]:
db_dict = dict(parse_head=parse_head, meta_dict=mdict, maxpix=60000)

In [34]:
jdict = ltu.jsonify(db_dict)

In [36]:
ltu.savejson('tst.json', jdict, easy_to_read=True, overwrite=True)