# Querying a specdb source Catalog [v1.0]

In [1]:
# import
from astropy import units as u
from astropy.coordinates import SkyCoord

import specdb
from specdb.specdb import SpecDB
from specdb import specdb as spdb_spdb
from specdb.cat_utils import flags_to_groups

## Load

### Use test file

In [2]:
db_file = specdb.__path__[0]+'/tests/files/IGMspec_DB_v02_debug.hdf5'

In [3]:
reload(spdb_spdb)
sdb = spdb_spdb.SpecDB(db_file=db_file)

Database is igmspec
Created on 2017-Jan-02


### Show a few things

In [4]:
sdb.cat[0:5]

flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,IGM_ID
int64,float64,str10,float64,float64,str3,float64,int64
1,-1.0,BOSS_PCA,0.0019,17.7737,QSO,2.308,0
1,0.002,BOSS_PCA,0.0028,14.9747,QSO,2.516,1
1,0.001,BOSS_PCA,0.0041,4.8298,QSO,1.629,2
1,0.0,BOSS_PCA,0.0053,-2.0333,QSO,1.362,3
1,0.001,BOSS_PCA,0.0057,-1.325,QSO,2.335,4


In [5]:
sdb.groups

[u'BOSS_DR12', u'HD-LLS_DR1', u'GGG', u'SDSS_DR7']

In [6]:
sdb.group_dict

{u'BOSS_DR12': 1, u'GGG': 16, u'HD-LLS_DR1': 8, u'SDSS_DR7': 2}

----

## Querying without Coordinates

### Generate a query dict

#### The following query dict will restrict to all sources with the following:

    redshift:  3<z<5
    STYPE == 'QSO'
    In any of the data groups with flag = 2, 4 or 32

In [7]:
qdict = {'zem': (3.,5.), 'flag_group-BITWISE-OR': [2,4,32], 'STYPE': 'QSO'}

### Query call
    Returns a bool array of rows that match, the subset of the catalog, and the ID values

In [8]:
matches, sub_cat, IDs = sdb.qcat.query_dict(qdict)

### Output

In [9]:
matches

array([False, False, False, False, False, False,  True, False,  True,
       False,  True,  True, False,  True,  True,  True,  True, False,  True], dtype=bool)

In [10]:
sub_cat[0:3]

flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,IGM_ID
int64,float64,str10,float64,float64,str3,float64,int64
3,0.001,BOSS_PCA,0.1035,24.9509,QSO,3.205,123
3,0.002,BOSS_PCA,0.1934,1.2391,QSO,3.738,225
8211,0.003,BOSS_PCA,2.8135,14.7672,QSO,4.986,3244


In [11]:
IDs

array([  123,   225,  3244, 17656, 22001, 22730, 24295, 32720, 41217])

----

## Query around a Position

### Input position can take a few forms as str or tuple:

    'J124511+144523'
    '124511+144523'
    'J12:45:11+14:45:23',
    ('12:45:11','+14:45:23')
    ('12 45 11', +14 45 23)
    (123.123, 12.1224) # Assumes ICRS deg
    
### Radius is an astropy Angle or Quantity
    10*u.arcsec

### An example or two

#### Searching for a single source

In [12]:
matches, sub_cat, IDs = sdb.qcat.query_position('001115.24+144601.9', 10*u.arcsec)

Your search yielded 1 match[es] within radius=10 arcsec


In [13]:
sub_cat

flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,IGM_ID
int64,float64,str10,float64,float64,str3,float64,int64
8211,0.003,BOSS_PCA,2.8135,14.7672,QSO,4.986,3244


#### Now a wide search

In [14]:
matches, sub_cat, IDs = sdb.qcat.query_position((2.813500,14.767200), 20*u.deg)

Your search yielded 12 match[es] within radius=20 deg


In [15]:
sub_cat  # Ordered by separation

flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,IGM_ID
int64,float64,str10,float64,float64,str3,float64,int64
8211,0.003,BOSS_PCA,2.8135,14.7672,QSO,4.986,3244
1,0.002,BOSS_PCA,0.0028,14.9747,QSO,2.516,1
3,0.001,BOSS_PCA,0.0391,13.9385,QSO,2.257,46
1,-1.0,BOSS_PCA,0.0019,17.7737,QSO,2.308,0
1,0.001,BOSS_PCA,0.0041,4.8298,QSO,1.629,2
3,0.001,BOSS_PCA,0.1035,24.9509,QSO,3.205,123
3,0.002,BOSS_PCA,0.1934,1.2391,QSO,3.738,225
3,0.0,BOSS_PCA,0.1788,0.9276,QSO,0.953,207
1,0.001,BOSS_PCA,0.0057,-1.325,QSO,2.335,4
1,0.0,BOSS_PCA,0.0053,-2.0333,QSO,1.362,3


In [16]:
flags_to_groups(sub_cat['flag_group'], sdb.group_dict)

array([u'BOSS_DR12,GGG,SDSS_DR7', u'BOSS_DR12', u'BOSS_DR12,SDSS_DR7',
       u'BOSS_DR12', u'BOSS_DR12', u'BOSS_DR12,SDSS_DR7',
       u'BOSS_DR12,SDSS_DR7', u'BOSS_DR12,SDSS_DR7', u'BOSS_DR12',
       u'BOSS_DR12', u'BOSS_DR12,HD-LLS_DR1,SDSS_DR7',
       u'BOSS_DR12,HD-LLS_DR1,GGG,SDSS_DR7'], 
      dtype='<U33')

#### Now a wide search restricted on groups (source needs occur in only one of the groups)

In [17]:
matches, sub_cat, IDs = sdb.qcat.query_position((2.813500,14.767200), 20*u.deg, groups=['SDSS_DR7','GGG'])

Your search yielded 7 match[es] within radius=20 deg


In [18]:
sub_cat # Also ordered by separation

flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,IGM_ID
int64,float64,str10,float64,float64,str3,float64,int64
8211,0.003,BOSS_PCA,2.8135,14.7672,QSO,4.986,3244
3,0.001,BOSS_PCA,0.0391,13.9385,QSO,2.257,46
3,0.001,BOSS_PCA,0.1035,24.9509,QSO,3.205,123
3,0.002,BOSS_PCA,0.1934,1.2391,QSO,3.738,225
3,0.0,BOSS_PCA,0.1788,0.9276,QSO,0.953,207
15,0.001,BOSS_PCA,20.4835,14.8066,QSO,2.877,21499
27,0.001,BOSS_PCA,16.5802,0.8065,QSO,4.432,17656


#### Now a wide search restricted on groups (source needs to occur in all of the groups)

In [19]:
matches, sub_cat, IDs = sdb.qcat.query_position('001115.24+144601.9', 20*u.deg, 
                                                groups=['SDSS_DR7','GGG'], in_all_groups=True)

Your search yielded 2 match[es] within radius=20 deg


#### Now a wide search restricted by a query_dict

In [20]:
qdict = dict(zem=(1.0, 3.))
matches, sub_cat, IDs = sdb.qcat.query_position('001115.24+144601.9', 20*u.deg, query_dict=qdict)
sub_cat

Your search yielded 7 match[es] within radius=20 deg


flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,IGM_ID
int64,float64,str10,float64,float64,str3,float64,int64
1,0.002,BOSS_PCA,0.0028,14.9747,QSO,2.516,1
3,0.001,BOSS_PCA,0.0391,13.9385,QSO,2.257,46
1,-1.0,BOSS_PCA,0.0019,17.7737,QSO,2.308,0
1,0.001,BOSS_PCA,0.0041,4.8298,QSO,1.629,2
1,0.001,BOSS_PCA,0.0057,-1.325,QSO,2.335,4
1,0.0,BOSS_PCA,0.0053,-2.0333,QSO,1.362,3
15,0.001,BOSS_PCA,20.4835,14.8066,QSO,2.877,21499


----

## Query with a set of coordinates
    Input is an astropy.coordinate.SkyCoord object, presumably an array

### Test data

In [21]:
coords = SkyCoord(ra=[0.0028,0.0019], dec=[14.9747,17.7737], unit='deg')

### Query (successful matches)

In [22]:
matches, subcat, IDs = sdb.qcat.query_coords(coords)

Your search yielded 2 matches from 2 input coordinates


In [23]:
#Check output
subcat

flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,IGM_ID
int64,float64,str10,float64,float64,str3,float64,int64
1,0.00152614617889,BOSS_PCA,0.00275643009297,14.9746754937,QSO,2.5158747041,1
1,-1.0,BOSS_PCA,0.00189828518376,17.7737391299,QSO,2.30763868197,0


### Query (one fail)

In [24]:
coords = SkyCoord(ra=[0.0028,0.0019], dec=[-14.9747,17.7737], unit='deg')
matches, subcat, IDs = sdb.qcat.query_coords(coords)
subcat

Your search yielded 1 matches from 2 input coordinates


flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,IGM_ID
int64,float64,str10,float64,float64,str3,float64,int64
0,0.0,,0.0,0.0,,0.0,-1
1,-1.0,BOSS_PCA,0.00189828518376,17.7737391299,QSO,2.30763868197,0


In [25]:
matches

array([False,  True], dtype=bool)

### Query with qdict

In [26]:
coords = SkyCoord(ra=[0.0028,0.0019], dec=[14.9747,17.7737], unit='deg')
qdict = dict(zem=(1.0, 2.5))
matches, subcat, IDs = sdb.qcat.query_coords(coords, query_dict=qdict)
subcat

Your search yielded 1 matches from 2 input coordinates


flag_group,sig_zem,flag_zem,RA,DEC,STYPE,zem,IGM_ID
int64,float64,str10,float64,float64,str3,float64,int64
0,0.0,,0.0,0.0,,0.0,-2
1,-1.0,BOSS_PCA,0.00189828518376,17.7737391299,QSO,2.30763868197,0


In [27]:
IDs  # -2 means there is a source at that coordinate but the other criteria is not matched

array([-2,  0])