## This notebook contains various information and trial queries of the MAST database API, tailored towards the interests of the JWQL application.

#### Available `services`

- `Mast.Jwst.Filtered.Miri`
- `Mast.Jwst.Filtered.Nircam`
- `Mast.Jwst.Filtered.Niriss`
- `Mast.Jwst.Filtered.Nirspec`
- `Mast.Jwst.Filtered.Fgs`

In [1]:
from collections import defaultdict
import glob
import os

from astropy.io import fits
from astroquery.mast import Mast

#### A very basic example of how to submit a query

In [2]:
service = "Mast.Jwst.Filtered.Miri"
params = {"columns":"*","filters":[]}
response = Mast.service_request_async(service,params)
result = response[0].json()

#### `response` returns a list of one item of type `requests.models.Response` which can easily be converted to `json`/`dict`

In [3]:
type(result)

dict

In [4]:
result.keys()

dict_keys(['data', 'fields', 'msg', 'paging', 'status'])

#### The `fields` key lists the resulting column names and their data type

In [None]:
result['fields']

#### The `data` key contains the values for each row returned from the query

In [None]:
result['data']

In [7]:
len(result['data'])

496

#### Specific columns can be pulled out by iterating over `data`

In [None]:
filenames = [item['filename'] for item in result['data']]
print(filenames)

In [9]:
print(len(filenames))
print(len(set(filenames)))

496
496


#### The `status` key appears to return the status of the query

In [10]:
result['status']

'COMPLETE'

#### The `paging` key describes the amount of data returned by the query

In [11]:
result['paging']

{'page': 1,
 'pageSize': 50000,
 'pagesFiltered': 1,
 'rows': 496,
 'rowsFiltered': 496,
 'rowsTotal': 496}

#### I'm not sure what the `msg` key stores

In [12]:
result['msg']

''

#### Testing the services for the other JWST instruments

In [13]:
services = ['Mast.Jwst.Filtered.Nircam', 'Mast.Jwst.Filtered.Nirspec',
            'Mast.Jwst.Filtered.Niriss', 'Mast.Jwst.Filtered.Miri', 
            'Mast.Jwst.Filtered.Fgs']
for service in services:
    params = {"columns":"*","filters":[]}
    response = Mast.service_request_async(service,params)
    result = response[0].json()
    print('{}: {} rows'.format(service, len(result['data'])))

Mast.Jwst.Filtered.Nircam: 3027 rows
Mast.Jwst.Filtered.Nirspec: 858 rows
Mast.Jwst.Filtered.Niriss: 387 rows
Mast.Jwst.Filtered.Miri: 496 rows
Mast.Jwst.Filtered.Fgs: 152 rows


### Example Query: Find all MIRI data taken between two time intervals

In [14]:
service = "Mast.Jwst.Filtered.Miri"
params = {"columns":"filename, expstart",
          "filters":[{"paramName":"expstart",
                      "values":[{"min":57404.04, "max":57404.07}],}]}
response = Mast.service_request_async(service,params)
result = response[0].json()

In [15]:
result['data']

[{'expstart': 57404.04476711806,
  'filename': 'jw80600003001_02101_00001_mirimage_cal.fits'},
 {'expstart': 57404.04476711806,
  'filename': 'jw80600003001_02101_00001_mirimage_i2d.fits'},
 {'expstart': 57404.05132637732,
  'filename': 'jw80600004001_02101_00001_mirimage_cal.fits'},
 {'expstart': 57404.05132637732,
  'filename': 'jw80600004001_02101_00001_mirimage_i2d.fits'},
 {'expstart': 57404.04108451065,
  'filename': 'jw80600001001_02103_00001_mirimage_uncal.fits'},
 {'expstart': 57404.04476711806,
  'filename': 'jw80600003001_02101_00001_mirimage_rate.fits'},
 {'expstart': 57404.04476711806,
  'filename': 'jw80600003001_02101_00001_mirimage_rateints.fits'},
 {'expstart': 57404.05979692546,
  'filename': 'jw80600005001_02101_00001_mirimage_uncal.fits'},
 {'expstart': 57404.05132637732,
  'filename': 'jw80600004001_02101_00001_mirimage_rate.fits'}]

#### Note that this doesn't seem to be possible with `date_obs`

In [16]:
service = "Mast.Jwst.Filtered.Miri"
params = {"columns":"filename, date_obs",
          "filters":[{"paramName":"date_obs",
                      "values":[{"min":'01-17-2016', "max":'01-18-2016'}],
                     }]}
response = Mast.service_request_async(service,params)
result = response[0].json()

In [17]:
len(result['data'])

496

In [None]:
result['data']

### Example Query: Which rootnames are currently proprietary?

In [19]:
service = "Mast.Jwst.Filtered.Miri"
params = {"columns":"filename, expstart, isRestricted, publicReleaseDate",
          "filters":[]}
response = Mast.service_request_async(service,params)
result = response[0].json()

In [None]:
result['data']

In [21]:
print(len(result['data']))

496


#### Again, the `/Date` datatype seems to be unparsable

#### What if we wanted to know this across all instruments?  It seems that five separate queries are needed

In [22]:
instruments = ['Nircam', 'Niriss', 'Nirspec', 'Miri', 'Fgs']
services = ["Mast.Jwst.Filtered.{}".format(instrument) for instrument in instruments]
results = []
params = {"columns":"filename, expstart, isRestricted, publicReleaseDate",
          "filters":[]}
for service in services:
    response = Mast.service_request_async(service,params)
    result = response[0].json()
    results.extend(result['data'])

In [None]:
results

In [24]:
print(len(results))

4920


### Example Query: How many images exist for a given NIRCam filter?

In [25]:
from collections import defaultdict
service = "Mast.Jwst.Filtered.Nircam"
params = {"columns":"filename, filter",
          "filters":[{"paramName":"date_obs",
                      "values":[{"min":'01-17-2016', "max":'01-18-2016'}],
                     }]}
response = Mast.service_request_async(service,params)
result = response[0].json()
results_dict = defaultdict(int)
for item in result['data']:
    results_dict[item['filter']] += 1

In [26]:
for item in results_dict:
    print('{}: {}'.format(item, results_dict[item]))

F460M: 2
F210M: 25
F277W: 120
F187N: 176
F444W: 180
F430M: 13
F410M: 9
F200W: 84
F115W: 396
F322W2: 67
F070W: 318
F356W: 111
F212N: 493
F360M: 5
WLP4: 378
F335M: 33
F182M: 19
F150W: 60
UNKNOWN: 1
F140M: 44
F480M: 29
None: 130
F150W2: 112
F300M: 5
F090W: 217


### Determining which header keywords exist in file headers, but not the MAST database

In [27]:
test_filename = '/our/test/file/base/directory/jw00329003001_02101_00001_nrca2_rate.fits' # Note the full path is excluded for security reasons

In [28]:
# Get set of all header keywords that exist in PRIMARY and SCI extension of files
header0_keys_fits = set(list(fits.getheader(test_filename, 0).keys()))
header0_keys_fits = set([item.lower().replace('-', '_') for item in header0_keys_fits])
header1_keys_fits = set(list(fits.getheader(test_filename, 1).keys()))
header1_keys_fits = set([item.lower().replace('-', '_') for item in header1_keys_fits])
header_keys_fits = header0_keys_fits | header1_keys_fits

In [29]:
# Get set of all keywords available in the MAST API
service = "Mast.Jwst.Filtered.Nircam"
params = {"columns":"*","filters":[{"paramName":"filename",
                          "values":[os.path.basename(test_filename)]}]}
response = Mast.service_request_async(service,params)
result = response[0].json()
header_keys_db = set(list(result['data'][0].keys()))

In [30]:
# Keywords that are in the MAST API but not the FITS headers
header_keys_db - header_keys_fits

{'ArchiveFileID',
 'FileSetId',
 'FileTypeID',
 'asnpool',
 'asntable',
 'bkglevel',
 'bkgsub',
 'checksum',
 'dataURI',
 'fileSetName',
 'fileSize',
 'ingestCompletionDate',
 'ingestStartDate',
 'isItar',
 'isRestricted',
 'isStale',
 'nwfsest',
 'productLevel',
 'psfref',
 'publicReleaseDate',
 'pwfseet',
 'selfref',
 'srctype'}

In [31]:
# Keywords that are in the FITS headers but not the MAST API
header_keys_fits - header_keys_db

{'',
 'bitpix',
 'bunit',
 'cal_vcs',
 'cal_ver',
 'cdelt1',
 'cdelt2',
 'comment',
 'crds_ctx',
 'crds_ver',
 'crpix1',
 'crpix2',
 'crval1',
 'crval2',
 'ctype1',
 'ctype2',
 'cunit1',
 'cunit2',
 'datamodl',
 'date_end',
 'dec_ref',
 'dec_v1',
 'extend',
 'extname',
 'extver',
 'gcount',
 'naxis',
 'naxis1',
 'naxis2',
 'pa_aper',
 'pa_v3',
 'pc1_1',
 'pc1_2',
 'pc2_1',
 'pc2_2',
 'pcount',
 'r_dark',
 'r_gain',
 'r_linear',
 'r_mask',
 'r_persat',
 'r_readno',
 'r_satura',
 'r_superb',
 'r_trpden',
 'r_trppar',
 'ra_ref',
 'ra_v1',
 'radesys',
 'roll_ref',
 's_dark',
 's_dqinit',
 's_ganscl',
 's_grpscl',
 's_jump',
 's_linear',
 's_persis',
 's_ramp',
 's_refpix',
 's_satura',
 's_superb',
 'simple',
 'time_end',
 'time_obs',
 'v2_ref',
 'v3_ref',
 'v3i_yang',
 'va_scale',
 'vparity',
 'wcsaxes',
 'xtension'}

### Determining which header keywords exist in file headers, but not the MAST database, using set of all test files

In [32]:
local_filenames = glob.glob('/our/test/file/base/directory/*/*.fits')

In [33]:
suffixes = []
for filename in local_filenames:
    fileonly = os.path.split(filename)[1]
    suff = fileonly.split('_')[-1]
    if suff not in suffixes:
        suffixes.append(suff)

In [34]:
suffixes

['uncal.fits',
 'rate.fits',
 'rateints.fits',
 'dark.fits',
 'trapsfilled.fits',
 'i2d.fits',
 'cal.fits',
 'x1d.fits',
 's2d.fits']

In [35]:
all_filenames = [f for f in local_filenames if 'trapsfilled' not in f]

In [36]:
len(all_filenames)

5532

In [37]:
all_header_keys = set()
for infile in all_filenames:
    # Get set of all header keywords that exist in PRIMARY and SCI extension of files
    header0_keys_fits = set(list(fits.getheader(infile, 0).keys()))
    header0_keys_fits = set([item.lower().replace('-', '_') for item in header0_keys_fits])
    header1_keys_fits = set(list(fits.getheader(infile, 1).keys()))
    header1_keys_fits = set([item.lower().replace('-', '_') for item in header1_keys_fits])
    header_keys_fits = header0_keys_fits | header1_keys_fits
    all_header_keys = all_header_keys | header_keys_fits

In [38]:
len(all_header_keys)

363

In [39]:
# Get set of all keywords available in the MAST API across all services
services = ['Mast.Jwst.Filtered.Nircam', 'Mast.Jwst.Filtered.Nirspec',
            'Mast.Jwst.Filtered.Niriss', 'Mast.Jwst.Filtered.Miri', 
            'Mast.Jwst.Filtered.Fgs']
header_keys_db = []
for service in services:
    params = {"columns":"*","filters":[]}
    response = Mast.service_request_async(service,params)
    result = response[0].json()
    header_keys_db.extend(list(result['data'][0].keys()))

header_keys_db = set(header_keys_db)

In [40]:
# Keywords that are in the MAST API but not the FITS headers
header_keys_db - all_header_keys

{'ArchiveFileID',
 'FileSetId',
 'FileTypeID',
 'checksum',
 'dataURI',
 'fileSetName',
 'fileSize',
 'ingestCompletionDate',
 'ingestStartDate',
 'isItar',
 'isRestricted',
 'isStale',
 'nwfsest',
 'productLevel',
 'publicReleaseDate',
 'pwfseet',
 'subpxpat'}

In [41]:
# Keywords that are in the FITS headers but not the MAST API
all_header_keys - header_keys_db

{'',
 'bitpix',
 'bscale',
 'bunit',
 'bzero',
 'cal_vcs',
 'cal_ver',
 'cdelt1',
 'cdelt2',
 'comment',
 'conext',
 'crds_ctx',
 'crds_ver',
 'crpix1',
 'crpix2',
 'crval1',
 'crval2',
 'ctype1',
 'ctype2',
 'ctype3',
 'cunit1',
 'cunit2',
 'cunit3',
 'datamodl',
 'date_end',
 'dec_ref',
 'dec_v1',
 'dfval',
 'dkern',
 'doutun',
 'dpixfr',
 'dwtscl',
 'extend',
 'extname',
 'extver',
 'gcount',
 'naxis',
 'naxis1',
 'naxis2',
 'naxis3',
 'naxis4',
 'ndriz',
 'nrs_norm',
 'nrs_ref',
 'pa_aper',
 'pa_v3',
 'pc1_1',
 'pc1_2',
 'pc2_1',
 'pc2_2',
 'pcount',
 'photmjsr',
 'photuja2',
 'pixar_a2',
 'pixar_sr',
 'r_area',
 'r_barsha',
 'r_camera',
 'r_collim',
 'r_dark',
 'r_dflat',
 'r_disper',
 'r_distor',
 'r_drzpar',
 'r_extr1d',
 'r_fflat',
 'r_filoff',
 'r_flat',
 'r_fore',
 'r_fpa',
 'r_gain',
 'r_ifufor',
 'r_ifupos',
 'r_ifusli',
 'r_linear',
 'r_mask',
 'r_msa',
 'r_ote',
 'r_persat',
 'r_photom',
 'r_pthlos',
 'r_readno',
 'r_refpix',
 'r_region',
 'r_rscd',
 'r_satura',
 'r_sflat

In [42]:
len(all_header_keys - header_keys_db)

172