# Ingesting Private Datasets (v1.5)

In [34]:
# imports
import warnings
warnings.filterwarnings('ignore')

import h5py
import specdb
import glob

from astropy.table import Table
from linetools import utils as ltu

from specdb.build import privatedb as pbuild
from specdb.specdb import IgmSpec

## Test on Single Folder

In [2]:
tree = specdb.__path__[0]+'/build/tests/files'
#os.getenv('DROPBOX_DIR')+'/QSOPairs/data/MMT_redux/'

In [3]:
reload(pbuild)
flux_files = pbuild.grab_files(tree)
len(flux_files)

3

In [4]:
flux_files[:5]

([u'/Users/xavier/local/Python/specdb/specdb/build/tests/files//./SDSSJ001605.89+005654.3_b800_F.fits.gz',
  u'/Users/xavier/local/Python/specdb/specdb/build/tests/files//./SDSSJ001607.27+005653.1_b800_F.fits.gz'],
 None,
 None)

----

## Directory Tree

In [11]:
tree2 = specdb.__path__[0]+'/data/test_privateDB'

In [12]:
branches = glob.glob(tree2+'/*')
branches[0]

'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS'

### Files

In [13]:
reload(pbuild)
mflux_files, meta_file, _ = pbuild.grab_files(branches[0])
len(mflux_files)

2

In [14]:
mflux_files[:5]

[u'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095240.17+515250.03.fits.gz',
 u'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095243.05+515121.15.fits.gz']

In [15]:
meta_file

u'/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS/COS_meta.json'

In [16]:
meta_dict = ltu.loadjson(meta_file)
meta_dict

{u'maxpix': 60000,
 u'meta_dict': {u'TELESCOPE': u'HST'},
 u'parse_head': {u'DATE-OBS': u'DATE',
  u'GRATING': u'OPT_ELEM',
  u'INSTR': u'INSTRUME',
  u'R': True}}

### ztbl  (read from file)

In [27]:
ztbl = Table.read(specdb.__path__[0]+'/data/test_privateDB/testDB_ztbl.fits')
ztbl

RA,DEC,ZEM,ZEM_SOURCE,SPEC_FILE
float64,float64,float64,str5,str35
331.992916667,12.9956388889,1.0,UNKNW,SDSSJ220758.30+125944.3_F.fits
261.35275,30.6344166667,1.1,UNKNW,SDSSJ172524.66+303803.9_F.fits
345.184833333,1.92825,1.2,UNKNW,SDSSJ230044.36+015541.7_r600_F.fits
345.184833333,1.92825,1.2,UNKNW,SDSSJ230044.36+015541.7_b400_F.fits
148.167375,51.8805638889,1.3,UNKNW,J095240.17+515250.03.fits.gz
148.179375,51.855875,1.4,UNKNW,J095243.05+515121.15.fits.gz


In [20]:
# Add to the table
new_tbl = ztbl[np.array([0,1,2,2,3,4])]
new_tbl

RA,DEC,ZEM,ZEM_SOURCE
float64,float64,float64,str5
331.992916667,12.9956388889,1.0,UNKNW
261.35275,30.6344166667,1.1,UNKNW
345.184833333,1.92825,1.2,UNKNW
345.184833333,1.92825,1.2,UNKNW
148.167375,51.8805638889,1.3,UNKNW
148.179375,51.855875,1.4,UNKNW


In [21]:
spec_files = ['SDSSJ220758.30+125944.3_F.fits','SDSSJ172524.66+303803.9_F.fits','SDSSJ230044.36+015541.7_r600_F.fits', 
              'SDSSJ230044.36+015541.7_b400_F.fits','J095240.17+515250.03.fits.gz', 'J095243.05+515121.15.fits.gz']

In [22]:
new_tbl['SPEC_FILE'] = spec_files

In [23]:
new_tbl.write('testDB_ztbl.fits', overwrite=True)

In [24]:
new_tbl

RA,DEC,ZEM,ZEM_SOURCE,SPEC_FILE
float64,float64,float64,str5,str35
331.992916667,12.9956388889,1.0,UNKNW,SDSSJ220758.30+125944.3_F.fits
261.35275,30.6344166667,1.1,UNKNW,SDSSJ172524.66+303803.9_F.fits
345.184833333,1.92825,1.2,UNKNW,SDSSJ230044.36+015541.7_r600_F.fits
345.184833333,1.92825,1.2,UNKNW,SDSSJ230044.36+015541.7_b400_F.fits
148.167375,51.8805638889,1.3,UNKNW,J095240.17+515250.03.fits.gz
148.179375,51.855875,1.4,UNKNW,J095243.05+515121.15.fits.gz


### Meta

In [25]:
reload(pbuild)
meta = pbuild.mk_meta(mflux_files, ztbl, fname=True, mdict=meta_dict['meta_dict'], parse_head=meta_dict['parse_head'])

In [26]:
meta[0:3]

sig_zem,flag_zem,flag_survey,RA,DEC,STYPE,zem,SPEC_FILE,DATE-OBS,GRATING,R,INSTR,TELESCOPE,EPOCH,SURVEY_ID
float64,str9,int64,float64,float64,str3,float64,unicode96,str10,str5,float64,str3,unicode3,float64,int64
0.0,UNKNW,1,148.167375,51.8805638889,QSO,1.3,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095240.17+515250.03.fits.gz,2015-05-31,G130M,17000.0,COS,HST,2000.0,0
0.0,UNKNW,1,148.179375,51.855875,QSO,1.4,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095243.05+515121.15.fits.gz,2015-12-08,G130M,17000.0,COS,HST,2000.0,1


#### Without fname=True
    Requires SPEC_FILE in ztbl

In [28]:
meta2 = pbuild.mk_meta(mflux_files, ztbl, fname=False, mdict=meta_dict['meta_dict'], parse_head=meta_dict['parse_head'])

In [29]:
meta2

sig_zem,flag_zem,flag_survey,RA,DEC,STYPE,zem,SPEC_FILE,DATE-OBS,GRATING,R,INSTR,TELESCOPE,EPOCH,SURVEY_ID
float64,str9,int64,float64,float64,str3,float64,unicode96,str10,str5,float64,str3,unicode3,float64,int64
0.0,UNKNW,1,148.167375,51.8805638889,QSO,1.3,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095240.17+515250.03.fits.gz,2015-05-31,G130M,17000.0,COS,HST,2000.0,0
0.0,UNKNW,1,148.179375,51.855875,QSO,1.4,/Users/xavier/local/Python/specdb/specdb/data/test_privateDB/COS//./J095243.05+515121.15.fits.gz,2015-12-08,G130M,17000.0,COS,HST,2000.0,1


### Spectra

In [26]:
hdf = h5py.File('tmp.hdf5','w')

In [28]:
reload(pbuild)
pbuild.ingest_spectra(hdf, 'test', meta, max_npix=meta_dict['maxpix'])

Adding test survey to DB


In [29]:
hdf.close()

## All in One

In [30]:
pbuild.mk_db(tree2, 'test', 'tmp.hdf5', ztbl, fname=True)

Wrote tmp.hdf5 DB file


In [33]:
# Without fname
pbuild.mk_db(tree2, 'test', 'tmp2.hdf5', ztbl, fname=False)

Wrote tmp2.hdf5 DB file


## Check ESI meta

In [35]:
igmsp = IgmSpec(db_file='tmp2.hdf5')

Using tmp2.hdf5 for the catalog file
Using tmp2.hdf5 for the DB file
Available surveys: []
Database is /Users/xavier/local/Python/specdb/specdb/data/test_privateDB
Created on 2016-Dec-29


----

## JSON files for meta table

In [8]:
parse_head = {'DATE-OBS':'DATE', 'TELESCOPE':'TELESCOP','INSTR':'INSTRUME', 'R': True}
mdict = dict(GRATING='ALL', R=8000.)

In [9]:
db_dict = dict(parse_head=parse_head, meta_dict=mdict, maxpix=60000)

In [10]:
jdict = ltu.jsonify(db_dict)

In [12]:
ltu.savejson('tst.json', jdict, easy_to_read=True)