In [2]:
import numpy as np
import pandas as pd
import glob
import os
import sys
import datetime
from collections import Counter

## Visualizing the STIS Data Archive

### Storing metadata in an pandas dataframe
Currently, we can grab higher level metadata from MAST, the below code was written by Sean Lockwood and constructs a pandas dataframe of STIS metadata out of all STIS science observations. This doesn't yet take advantage of astroquery's MAST API, but that is on the to-do list. Once lower-level metadata comes to astroquery (Hopefully Fall 2019?) we will wrap that into this project.

In [20]:
def download_mast_metadata(datatype='S', instrument='STIS',output_csv=False):
    '''
    Downloads all HST/{STIS,COS} science metadata from MAST.
    
    'datatype' is one of:
        'S'         -- science obserations (default)
        'C'         -- calibration observations
        '%' | 'ALL' -- both science and calibration observations
    
    Mast documentation:
        MAST GET Requests:      https://archive.stsci.edu/vo/mast_services.html#GET
        HST-specific keywords:  https://archive.stsci.edu/search_fields.php?mission=hst
    '''
    import urllib.request
    import urllib.parse
    
    # Determine if we want 'science', 'calibration', or 'all' datasets:
    datatype = datatype.upper()
    assert datatype in ['S', 'C', '%', 'ALL'], "'datatype' is not a valid selection."
    if datatype == 'ALL':
        datatype = '%'
    
    url = 'https://archive.stsci.edu/hst/search.php'
    
    # Output columns
    selectedColumnsCsv = \
        'sci_data_set_name,'      + \
        'sci_obset_id,'           + \
        'sci_targname,'           + \
        'sci_start_time,'         + \
        'sci_stop_time,'          + \
        'sci_actual_duration,'    + \
        'sci_instrume,'           + \
        'sci_instrument_config,'  + \
        'sci_operating_mode,'     + \
        'sci_aper_1234,'          + \
        'sci_spec_1234,'          + \
        'sci_central_wavelength,' + \
        'sci_fgslock,'            + \
        'sci_mtflag,'             + \
        'sci_pep_id,'             + \
        'sci_aec,'                + \
        'sci_obs_type,'           + \
        'scp_scan_type'
    
    # Loop year-by-year to avoid data limits:
    all_years = []
    for year in np.arange(1997, datetime.datetime.now().year + 1):
        print ('Working on {}...'.format(year))
        data = [ \
            ('sci_instrume',       instrument), 
            ('sci_aec',            datatype), 
            ('sci_start_time',     'Jan 1 {} .. Jan 1 {}'.format(year, year+1)), 
            ('max_records',        '25000'), 
            ('ordercolumn1',       'sci_start_time'), 
            ('outputformat',       'JSON'), 
            ('selectedColumnsCsv', selectedColumnsCsv), 
            ('nonull',             'on'), 
            ('action',             'Search'), ]
        
        try:
            url_values = urllib.parse.urlencode(data)
            full_url = url + '?' + url_values
            #print (full_url)
            with urllib.request.urlopen(full_url) as response:
                json_file = response.read()
            
            # Convert to Pandas table:
            all_years.append(pd.read_json(json_file.decode()))
        except ValueError:
            pass  # Sad years with no data
    
    # Concatenate individual years together:
    mast = pd.concat(all_years)
    
    # Modify/add some rows:
    mast['Start Time'] = [datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S') for x in mast['Start Time']]
    mast['obstype'] = ['Imaging' if 'MIR' in x else 'Spectroscopic' for x in mast['Filters/Gratings']]
    mast.loc[mast['Apertures'] == '50CORON', 'obstype'] = 'Coronagraphic'
    mast['Instrument Config'] = [x.strip() for x in mast['Instrument Config']]
    
    if output_csv == True:
        mast.to_csv("stis_archive.csv")
    return mast

generate = False
if generate:
    mast = download_mast_metadata(instrument='STIS', output_csv=True)  # Or analyze 'COS'
else:
    mast = pd.read_csv("stis_archive.csv",dtype=str)
    mast = mast[mast.keys()[1:]]
print ('Number rows:  {}'.format(len(mast)))

Number rows:  108231
