# GSEG data validation

This notebook contains code that can be used to validate data created during GSEG3. It reads in the APT-derived xml and pointing files and constructs a dictionary of expected data properties. It then compares these properties to the information contained in the headers of the actual data to look for inconsistencies.

**Mirage and pysiaf are dependencies. Be sure they are installed in your environment.**

In [1]:
from astropy.io import fits
from astropy.table import Table
from collections import OrderedDict
from glob import glob
from mirage.yaml import yaml_generator
from mirage.apt import apt_inputs
from mirage.seed_image.catalog_seed_image import Catalog_seed
from mirage.utils.siaf_interface import sci_subarray_corners
from mirage.utils.utils import calc_frame_time
from mirage.yaml.generate_observationlist import get_observation_dict
import numpy as np
import os
import pkg_resources
import pysiaf

## User Inputs

In [2]:
# Header keywords to check against APT-derived dictionary
# It is assumed that these header keywords exist in both the uncal and rate images
KEYWORDS = ['SUBARRAY', 'DETECTOR', 'NINTS', 'NGROUPS', 'NAXIS', 'EFFEXPTM',
            'LONGFILTER', 'LONGPUPIL', 'SHORTFILTER', 'SHORTPUPIL', 'READPATT',
            'OBSLABEL', 'EXP_TYPE', 'TITLE', 'OBSERVTN', 'TEMPLATE',
            'EXPRIPAR', 'SUBSTRT1', 'SUBSTRT2', 'SUBSIZE1', 'SUBSIZE2',
            'FASTAXIS', 'SLOWAXIS', 'PATTTYPE']

In [3]:
# Corresponding keywords in the APT-derived dictionary
# These must correspond one-to-one with KEYWORDS above
TABLE_KEYWORDS = ['Subarray', None, 'Integrations', 'Groups', None, None,
                  'LongFilter', 'LongPupil', 'ShortFilter', 'ShortPupil', 'ReadoutPattern',
                  'ObservationName', 'Mode', 'Title', 'ObservationID', 'APTTemplate',
                  'ParallelInstrument', None, None, None, None,
                  None, None, 'PrimaryDitherType']

In [4]:
# Dictionary of header keywords and their expected values in the 
# PRIMARY and SCI extensions of the UNCAL and RATE files.
# These are useful when you know exactly what these keywords should be
# for every file (e.g. NAXIS should always be 2 in the RATE image SCI header).
# These will be checked in addition to the KEYWORDS checks above
UNCAL_PRIMARY_KEYWORDS = {}
UNCAL_SCI_KEYWORDS = {'BITPIX':16, 'NAXIS':4, 'BUNIT':'DN'}
RATE_PRIMARY_KEYWORDS = {}
RATE_SCI_KEYWORDS = {'BITPIX':-32, 'NAXIS':2, 'BUNIT':'DN/s'}

In [5]:
# The PRIMARY and SCI header keywords to store for each file in the output summary table
SUMMARY_TABLE_PRIMARY = ['FILENAME', 'DETECTOR', 'FILTER', 'PUPIL', 'EXP_TYPE', 'READPATT', 'NINTS', 'NGROUPS',
                         'NFRAMES', 'GROUPGAP', 'SUBARRAY', 'SUBSIZE1', 'SUBSIZE2', 'APERNAME']
SUMMARY_TABLE_SCI = ['BITPIX', 'NAXIS', 'NAXIS1', 'NAXIS2', 'NAXIS3', 'NAXIS4', 'BUNIT']

## Define some constants

In [6]:
INTEGER_KEYWORDS = ['Integrations', 'Groups']
FLOAT_KEYWORDS = ['EFFEXPTM']
FILTER_KEYWORDS = ['LONGFILTER', 'LONGPUPIL', 'SHORTFILTER', 'SHORTPUPIL']

In [7]:
# For FASTAXIS and SLOWAXIS
HORIZONTAL_FLIP = ['NRCA1', 'NRCA3', 'NRCALONG', 'NRCB2', 'NRCB4']
VERTICAL_FLIP = ['NRCA2', 'NRCA4', 'NRCB1', 'NRCB3', 'NRCBLONG']

In [47]:
# Expected detectors used for each module_subarray combo (not set up for coronography currently)
DETECTOR_DICT = {'ALL_FULL':['NRCA1', 'NRCA2', 'NRCA3', 'NRCA4', 'NRCALONG',
                             'NRCB1', 'NRCB2', 'NRCB3', 'NRCB4', 'NRCBLONG'],
                 'A_FULL':['NRCA1', 'NRCA2', 'NRCA3', 'NRCA4', 'NRCALONG'],
                 'A_SUBGRISM256':['NRCA1', 'NRCA3','NRCALONG'],
                 'A_SUBGRISM128':['NRCA1', 'NRCA3','NRCALONG'],
                 'A_SUBGRISM64':['NRCA1', 'NRCA3','NRCALONG'],
                 'A_SUB32TATSGRISM':['NRCALONG'],
                 'B_FULL':['NRCB1', 'NRCB2', 'NRCB3', 'NRCB4', 'NRCBLONG'],
                 'B_SUB640':['NRCB1', 'NRCB2', 'NRCB3', 'NRCB4', 'NRCBLONG'],
                 'B_SUB320':['NRCB1', 'NRCB2', 'NRCB3', 'NRCB4', 'NRCBLONG'],
                 'B_SUB160':['NRCB1', 'NRCB2', 'NRCB3', 'NRCB4', 'NRCBLONG'],
                 'B_SUB400P':['NRCB1' 'NRCBLONG'],
                 'B_SUB160P':['NRCB1' 'NRCBLONG'],
                 'B_SUB64P':['NRCB1' 'NRCBLONG'],
                 'B_SUB32TATS':['NRCBLONG']}

## Define functions

In [223]:
def add_entry(filename, summary_dict):
    """Adds an entry for filename to the output summary dict
    """
    for key in SUMMARY_TABLE_PRIMARY:
        try:
            val = str(fits.getheader(filename, 'PRIMARY')[key])
        except KeyError:
            val = ''
        summary_dict[key].append(val)
    
    for key in SUMMARY_TABLE_SCI:
        try:
            val = str(fits.getheader(filename, 'SCI')[key])
        except KeyError:
            val = ''
        summary_dict[key].append(val)
    
    return summary_dict

In [224]:
def adjust_exptype(value):
    """Modify the exposure type as listed in the exposure table
    to match one of the strings as used in the fits files.
    e.g. 'imaging' becomes 'NRC_IMAGE'
    Remember that currently, Mirage only knows imaging and wfss
    """
    if value == 'imaging':
        return 'NRC_IMAGE'
    elif value == 'wfss':
        return 'NRC_GRISM'

In [225]:
def calculate_total_files(exp_dict, index):
    """Calculate the total number of files expected for an
    observation based on the number of dithers and the module.
    ASSUME detectors are used as hard-coded below. This
    assumption will not be true for some WFSC apertures.
    """
    
    # Expected detectors used for each module/subarray combo
    
    
    module = exp_dict['Module'][index]
    subarray = exp_dict['Subarray'][index]
    subarray = subarray.replace('DHSPILA', '').replace('DHSPILB', '')
    number_of_dithers = exp_dict['number_of_dithers'][index]
    if module in ['A', 'B']:
        dets = 5
    else:
        dets = 10
    total = number_of_dithers * dets
    return total

In [226]:
def equalize_file_lists(uncal, rate):
    """Given lists of uncal and rate files corresponding to a single
    observation, adjust the lists to be the same length, adding in
    None for any files that are missing in a given list
    """
    udict = {}
    rdict = {}
    expanded_rate = []
    expanded_uncal = []

    # Loop through uncal files and look for matching rate files
    for ufile in uncal:
        dirname, filename = os.path.split(ufile)
        base = filename.strip('_uncal.fits')
        fullbase = os.path.join(dirname, base)
        found = False
        for rfile in rate:
            if fullbase in rfile:
                found = True
                break
        udict[base] = found

    # Loop through rate files and look for matching uncal files
    for rfile in rate:
        dirname, filename = os.path.split(rfile)
        base = filename.strip('_rate.fits')
        fullbase = os.path.join(dirname, base)
        found = False
        for ufile in uncal:
            if fullbase in ufile:
                found = True
                break
        rdict[base] = found

    # Fill in missing files, in either uncal or rate lists,
    # with None
    for ukey in udict:
        expanded_uncal.append(ukey + '_uncal.fits')
        if udict[key]:
            expanded_rate.append(ukey + '_rate.fits')
        else:
            expanded_rate.append(None)
    for rkey in rdict:
        if not rdict[key]:
            expanded_rate.append(rkey + '_rate.fits')
            expanded_uncal.append(None)
    return expanded_uncal, expanded_rate

In [227]:
def find_existing_files(uncal_files, rate_files):
    """Find the number of uncal and rate files that exist
    in the input file paths, and return any missing uncal/rate files.
    The expectation is that every uncal file has a corresponding 
    rate file.
    """
    uncal_files_exist = np.array([os.path.isfile(f) for f in uncal_files])
    rate_files_exist = np.array([os.path.isfile(f) for f in rate_files])
    
    n_uncal = len(uncal_files_exist[uncal_files_exist])
    n_rate = len(rate_files_exist[rate_files_exist])
    
    missing_uncal_files = np.array(uncal_files)[~uncal_files_exist]
    missing_rate_files = np.array(rate_files)[~rate_files_exist]
    
    return n_uncal, n_rate, missing_uncal_files, missing_rate_files

In [228]:
def find_fastaxis(detector):
    """Identify the values of FASTAXIS and SLOWAXIS based on the detector
    name
    """
    if detector in HORIZONTAL_FLIP:
        fast = -1
        slow = 2
    elif detector in VERTICAL_FLIP:
        fast = 1
        slow = -2
    return fast, slow

In [229]:
def get_data(filename):
    """Read in the given fits file and return the data and header
    """
    with fits.open(filename) as h:
        signals = h['SCI'].data
        header0 = h[0].header
        header1 = h[1].header
    return signals, header0, header1

In [230]:
def get_expected_shape(sub):
    """Returns the expected shape of the science data
    based on the input APT subarray.
    """
    siaf = pysiaf.Siaf('NIRCam')
    subarray = sub.replace('SUB', '').replace('DHSPILA', '').replace('DHSPILB', '')
    
    if 'FULL' in subarray:
        expected_shape = (2048, 2048)
    else:
        # needed to be careful here to remove cases where e.g. SUB64 was in SUB640
        similar_aps = [aper for aper in siaf.apernames if subarray in aper and subarray+'0' not in aper]
        if len(similar_aps) == 0:
            print('WARNING: Cannot find expected shape for subarray {}'.format(sub))
            expected_shape = (-99, -99)
        else:
            # just use first entry to get expected shape since they should all be the same
            similar_ap = similar_aps[0]
            expected_shape = (siaf[similar_ap].YSciSize, siaf[similar_ap].XSciSize)
    
    return expected_shape

In [231]:
def header_keywords(head):
    """Extract values for the desired keywords from the given header
    """
    file_info = {}
    for keyword in KEYWORDS:
        try:
            info = head[keyword]
        except KeyError:
            if 'FILTER' in keyword:
                info = head['FILTER']
            elif 'PUPIL' in keyword:
                info = head['PUPIL']
            else:
                info = None

        file_info[keyword] = info
    return file_info

In [232]:
def table_info(values, index):
    """Extract information from the exposure table that matches the
    header keyword values in KEYWORDS
    """
    values_dict = {}
    for table_keyword, file_keyword in zip(TABLE_KEYWORDS, KEYWORDS):
        if table_keyword is not None:
            if table_keyword in INTEGER_KEYWORDS:
                value = int(values[table_keyword][index])
            else:
                value = values[table_keyword][index]
            values_dict[file_keyword] = value
        else:
            values_dict[file_keyword] = None
    return values_dict

In [233]:
def verify_dimensions(filename, file_type, expected_shape):
    """Verify the header and data dimensions for each extension.
    """
    header = fits.getheader(filename, 'PRIMARY')
    if file_type == 'UNCAL':
        extensions = ['SCI']
        primary_header_shape = (header['NINTS'], header['NGROUPS'], header['SUBSIZE2'], header['SUBSIZE1'])
        for ext in extensions:
            try:
                header = fits.getheader(filename, ext)
                data_shape = fits.getdata(filename, ext).shape
                naxis_shape = (header['NAXIS4'], header['NAXIS3'], header['NAXIS2'], header['NAXIS1'])
                if ((primary_header_shape != data_shape) | (primary_header_shape != naxis_shape) | 
                    (primary_header_shape[-2:] != expected_shape)):
                    print('WARNING: Data dimensions incorrect')
                    print('Expected image shape: {}'.format(expected_shape))
                    print('PRIMARY header shape: {}'.format(primary_header_shape))
                    print('{} header shape: {}'.format(ext, naxis_shape))
                    print('{} data shape: {}'.format(ext, data_shape)) 
            except KeyError:
                print('Cannot verify shape for {} extension'.format(ext))
    elif file_type == 'RATE':
        extensions = 'SCI ERR DQ VAR_POISSON VAR_RNOISE'.split()
        primary_header_shape = (header['SUBSIZE2'], header['SUBSIZE1'])
        for ext in extensions:
            try:
                header = fits.getheader(filename, ext)
                data_shape = fits.getdata(filename, ext).shape
                naxis_shape = (header['NAXIS2'], header['NAXIS1'])
                if ((primary_header_shape != data_shape) | (primary_header_shape != naxis_shape) | 
                    (primary_header_shape != expected_shape)):
                    print('WARNING: Data dimensions incorrect')
                    print('Expected image shape: {}'.format(expected_shape))
                    print('PRIMARY header shape: {}'.format(primary_header_shape))
                    print('{} header shape: {}'.format(ext, naxis_shape))
                    print('{} data shape: {}'.format(ext, data_shape))
            except KeyError:
                print('Cannot verify shape for {} extension'.format(ext))
    else:
        print('File type {} not supported for dimension checks'.format(file_type))

In [234]:
def verify_extensions(filename, file_type):
    """Verify that the expected extensions exist
    """
    if file_type == 'UNCAL':
        extensions = 'PRIMARY SCI GROUP INT_TIMES ASDF'.split()
    elif file_type == 'RATE':
        extensions = 'PRIMARY SCI ERR DQ VAR_POISSON VAR_RNOISE ASDF'.split()
    else:
        print('File type {} not supported for ext verification'.format(file_type))
        
    for ext in extensions:
        try:
            header = fits.getheader(filename, ext)
        except KeyError:
            print('WARNING: {} extension does not exist'.format(ext))

## The main function

In [235]:
def validate(xml_file, output_dir, gseg_uncal_files):
    """MAIN FUNCTION"""
    
    read_pattern_def_file = os.path.join(pkg_resources.resource_filename('mirage', ''), 
                                         'config', 'nircam_read_pattern_definitions.list')
    
    # Make an empty dictionary to store output summary table info
    summary_dict = OrderedDict()
    cols = SUMMARY_TABLE_PRIMARY + SUMMARY_TABLE_SCI
    for col in cols:
        summary_dict[col] = []
    
    # Check that the number of existing uncal and rate files are equal and
    # that no files are missing.
    gseg_rate_files = [f.replace('uncal', 'rate') for f in gseg_uncal_files]
    n_uncal, n_rate, missing_uncal_files, missing_rate_files = find_existing_files(gseg_uncal_files, 
                                                                                   gseg_rate_files)
    if (n_uncal != n_rate) | (len(missing_uncal_files) != 0) | (len(missing_rate_files) != 0):
        print('WARNING: Some files do not exist')
        print('Number of existing uncal files: {}'.format(n_uncal))
        print('Number of existing rate files: {}'.format(n_rate))
        print('Missing uncal files: {}'.format(missing_uncal_files))
        print('Missing rate files: {}'.format(missing_rate_files))
    
    # Create apt-derived dictionary
    pointing_file = xml_file.replace('.xml', '.pointing')
    catalogs = {'nircam': {'sw': 'nothing.cat', 'lw': 'nothing.cat'}}

    observation_list_file = os.path.join(output_dir, 'observation_list.yaml')
    apt_xml_dict = get_observation_dict(xml_file, observation_list_file, catalogs,
                                        verbose=True)

    observation_list = set(apt_xml_dict['ObservationID'])
    int_obs = sorted([int(o) for o in observation_list])
    str_obs_list = [str(o).zfill(3) for o in int_obs]

    for observation_to_check in str_obs_list:
        print('')
        print('')
        print('OBSERVATION: {}'.format(observation_to_check))
        print('')

        good = np.where(np.array(apt_xml_dict['ObservationID']) == observation_to_check)

        try:
            total_expected_files = calculate_total_files(apt_xml_dict, good[0][0])
            print('Total number of expected files: {}'.format(total_expected_files))
        except IndexError:
            print("No files found.")
            continue

        # The complication here is that the table created by Mirage does not have a filename
        # attached to each entry. So we need a way to connect an actual filename
        # to each entry
        subdir_start = 'jw' + apt_xml_dict['ProposalID'][good[0][0]] + observation_to_check.zfill(3)
        matching_uncal_files = sorted([filename for filename in gseg_uncal_files if subdir_start in filename])
        matching_rate_files = sorted([filename for filename in gseg_rate_files if subdir_start in filename])
        print('Found uncal files:')
        for i in range(len(matching_uncal_files)):
            print(matching_uncal_files[i])
        print('')
        print('Found rate files:')
        for i in range(len(matching_rate_files)):
            print(matching_rate_files[i])
        print('')

        # Check to see if any files are missing
        if len(matching_uncal_files) != total_expected_files:
            print("WARNING: Missing uncal files for observation {}. Expected {} files, found {}.".format(observation_to_check, total_expected_files, len(matching_uncal_files)))
        if len(matching_rate_files) != total_expected_files:
            print("WARNING: Missing rate files for observation {}. Expected {} files, found {}.".format(observation_to_check, total_expected_files, len(matching_rate_files)))

        # Deal with the case of matching_uncal_files and matching_rate_files having
        # different lengths here. In order to loop over them they must have the same length
        if len(matching_uncal_files) != len(matching_rate_files):
            (matching_uncal_files, matching_rate_files) = equalize_file_lists(matching_uncal_files, matching_rate_files)
            print('Equalized file lists (should have a 1:1 correspondence):')
            for idx in range(len(matching_uncal_files)):
                print(matching_uncal_files[idx], matching_rate_files[idx])

        # Create siaf instance for later calculations
        siaf = pysiaf.Siaf('NIRCam')

        for file_pair in zip(matching_uncal_files, matching_rate_files):
            for f in file_pair: 
                # Only validate files that exist
                good_file = f != None
                if good_file:
                    if not os.path.isfile(f):
                        print('WARNING: File does not exist: {}'.format(f))
                        good_file = False
                
                if good_file:
                    print("Checking {}".format(os.path.split(f)[1]))
                    print('-----------------------------------------------')
                    file_type = f.split('.fits')[0].split('_')[-1].upper()
                else:
                    continue
                
                # Verify that all expected extensions exist for this file and add
                # file info to the output summary table
                verify_extensions(f, file_type)
                summary_dict = add_entry(f, summary_dict)
                
                # Get info from header to be compared
                data, header, sci_header = get_data(f)
                header_vals = header_keywords(header)

                # Get matching data from the exposure table
                table_vals = table_info(apt_xml_dict, good[0][0])
                
                # Verify that the header and data dimensions are correct in each extension
                expected_shape = get_expected_shape(table_vals['SUBARRAY'])
                verify_dimensions(f, file_type, expected_shape)
                
                # Check detector/aperture
                detector_from_filename = f.split('_')[-2].upper()
                header_detector = header['DETECTOR']
                aperture = header['APERNAME']  # could also try APERNAME, PPS_APER
                if 'LONG' in header_detector:
                    header_detector = header_detector.replace('LONG', '5')
                if header_detector not in aperture:
                    print(("WARNING: Detector name and aperture name in file header appear to be incompatible: {}, {}"
                          .format(header['DETECTOR'], aperture)))
                    print("Detector listed in filename: {}".format(detector_from_filename))
                    print('If the aperture is incorrect then the calculated subarray location from pysiaf will also be incorrect.')
                data_shape = data.shape
                
                # Compare NFRAME, GROUPGAP from header with expected values based on READPATT
                m = Catalog_seed()
                params = {'Readout': {'readpatt': header['READPATT']},
                          'Reffiles': {'readpattdefs': read_pattern_def_file}}
                m.params = params
                m.read_pattern_check()
                nframes = m.params['Readout']['nframe']
                groupgap = m.params['Readout']['nskip']
                if nframes != header['NFRAMES']:
                    print('WARNING: NFRAME mismatch between header ({}) and expected value ({}).'.format(
                          nframes, header['NFRAMES']))
                if groupgap != header['GROUPGAP']:
                    print('WARNING: GROUPGAP mismatch between header ({}) and expected value ({}).'.format(
                          groupgap, header['GROUPGAP']))

                # Make some adjustments to the exposure table info

                # Calucate the exposure time
                print('Aperture listed in header is: {}'.format(aperture))
                num_amps = 1
                frametime = calc_frame_time('NIRCam', aperture, data_shape[-1], data_shape[-2], num_amps)
                table_vals['EFFEXPTM'] = frametime * int(table_vals['NGROUPS'])

                # NAXIS
                table_vals['NAXIS'] = len(data.shape)
                header_vals['NAXIS'] = sci_header['NAXIS']

                # Use pysiaf to calculate subarray locations
                try:
                    xc, yc = sci_subarray_corners('NIRCam', aperture, siaf=siaf)
                    table_vals['SUBSTRT1'] = xc[0] + 1
                    table_vals['SUBSTRT2'] = yc[0] + 1
                    table_vals['SUBSIZE1'] = siaf[aperture].XSciSize
                    table_vals['SUBSIZE2'] = siaf[aperture].YSciSize
                except KeyError:
                    print("ERROR: Aperture {} is not a valid aperture in pysiaf.".format(aperture))
                    xc = [-2, -2]
                    yc = [-2, -2]
                    table_vals['SUBSTRT1'] = xc[0] + 1
                    table_vals['SUBSTRT2'] = yc[0] + 1
                    table_vals['SUBSIZE1'] = 9999
                    table_vals['SUBSIZE2'] = 9999

                # Create FASTAXIS and SLOWAXIS values based on the detector name
                fast, slow = find_fastaxis(header_vals['DETECTOR'])
                table_vals['FASTAXIS'] = fast
                table_vals['SLOWAXIS'] = slow

                # Remove whitespace from observing template in file
                header_vals['TEMPLATE'] = header_vals['TEMPLATE'].replace(' ', '').lower()
                table_vals['TEMPLATE'] = table_vals['TEMPLATE'].lower()

                # Adjust prime/parallel boolean from table to be a string
                if not table_vals['EXPRIPAR']:
                    table_vals['EXPRIPAR'] = 'PRIME'
                else:
                    table_vals['EXPRIPAR'] = 'PARALLEL'

                # Change exposure type from table to match up with
                # types of strings in the file
                table_vals['EXP_TYPE'] = adjust_exptype(table_vals['EXP_TYPE'])

                # Set the DETECTOR field to be identical. This info is not in the
                # exposure table, so we can't actually check it
                table_vals['DETECTOR'] = header_vals['DETECTOR']

                # Now compare the data in the dictionary from the file versus that
                # from the exposure table created from the APT file
                err = False
                for key in header_vals:
                    if header_vals[key] != table_vals[key]:
                        if key not in FLOAT_KEYWORDS and key not in FILTER_KEYWORDS:
                            err = True
                            print('MISMATCH: {}, in exp table: {}, in file: {}'.format(key, table_vals[key], header_vals[key]))
                        elif key in FLOAT_KEYWORDS:
                            if not np.isclose(header_vals[key], table_vals[key], rtol=0.01, atol=0.):
                                err = True
                                print('MISMATCH: {}, in exp table: {}, in file: {}'.format(key, table_vals[key], header_vals[key]))

                        if key in ['LONGFILTER', 'LONGPUPIL'] and 'LONG' in header_vals['DETECTOR']:
                            err = True
                            print('MISMATCH: {}, in exp table: {}, in file: {}'.format(key, table_vals[key], header_vals[key]))
                        if key in ['SHORTFILTER', 'SHORTPUPIL'] and 'LONG' not in header_vals['DETECTOR']:
                            err = True
                            print('MISMATCH: {}, in exp table: {}, in file: {}'.format(key, table_vals[key], header_vals[key]))
                
                # Perform direct comparison between header keywords and their expected values
                if file_type == 'UNCAL':
                    for key in UNCAL_PRIMARY_KEYWORDS:
                        if UNCAL_PRIMARY_KEYWORDS[key] != header[key]:
                            err = True
                            print('MISMATCH: {}, expected: {}, in file: {}'.format(
                                  key, UNCAL_PRIMARY_KEYWORDS[key], header[key]))
                    for key in UNCAL_SCI_KEYWORDS:
                        if UNCAL_SCI_KEYWORDS[key] != sci_header[key]:
                            err = True
                            print('MISMATCH: {}, expected: {}, in file: {}'.format(
                                  key, UNCAL_SCI_KEYWORDS[key], sci_header[key]))
                elif file_type == 'RATE':
                    for key in RATE_PRIMARY_KEYWORDS:
                        if RATE_PRIMARY_KEYWORDS[key] != header[key]:
                            err = True
                            print('MISMATCH: {}, expected: {}, in file: {}'.format(
                                  key, RATE_PRIMARY_KEYWORDS[key], header[key]))
                    for key in RATE_SCI_KEYWORDS:
                        if RATE_SCI_KEYWORDS[key] != sci_header[key]:
                            err = True
                            print('MISMATCH: {}, expected: {}, in file: {}'.format(
                                  key, RATE_SCI_KEYWORDS[key], sci_header[key]))
                else:
                    print('No direct header checks performed for {} file type.'.format(file_type))

                if not err:
                    print('No inconsistencies. File header info correct.')
                    
                print('')

            print('')
            print('')
    
    # Output the summary table
    summary_table = Table(summary_dict)
    summary_table.write(os.path.join(output_dir, 'summary_table.txt'), format='ascii.fixed_width_two_line')
    

## Run the tool

In [16]:
xml_file = '/path/to/proposal/xml/file/00617.xml'
output_dir = '/location/to/place/outputs/'
gseg_uncal_files = glob('/path/to/gseg/files/*uncal.fits')

In [None]:
validate(xml_file, output_dir, gseg_uncal_files)

In [32]:
xml_file = '/ifs/jwst/wit/witserv/data7/nrc/bsunnquist/gseg_validation_tests/gseg3ete_dataflow3/xml_files/617_retrievedfromAPT.xml'
output_dir = '/ifs/jwst/wit/witserv/data7/nrc/bsunnquist/gseg_validation_tests/misc/'

read_pattern_def_file = os.path.join(pkg_resources.resource_filename('mirage', ''), 
                                     'config', 'nircam_read_pattern_definitions.list')

# Create apt-derived dictionary
pointing_file = xml_file.replace('.xml', '.pointing')
catalogs = {'nircam': {'sw': 'nothing.cat', 'lw': 'nothing.cat'}}

observation_list_file = os.path.join(output_dir, 'observation_list.yaml')
apt_xml_dict = get_observation_dict(xml_file, observation_list_file, catalogs,
                                    verbose=True)

observation_list = set(apt_xml_dict['ObservationID'])
int_obs = sorted([int(o) for o in observation_list])
str_obs_list = [str(o).zfill(3) for o in int_obs]
    


++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Observation `001` labelled `NIRCam EO-1 GSEG3E1-OTB-20190122 ModB-SUB640` uses template `NircamEngineeringImaging`
APTObservationParams Dictionary holds 0 entries before reading template
Primary dither element PrimaryDithers not found, use default primary dithers value (1).
Number of dithers: 1 primary * 1 subpixel = 1
Dictionary read from template has 1 entries.
Found 1 tile(s) for observation 001 NIRCam EO-1 GSEG3E1-OTB-20190122 ModB-SUB640
Found 1 visits with numbers: [1]
APTObservationParams Dictionary holds 1 entries after reading template (+1 entries)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Observation `002` labelled `NIRCam EO-2 GSEG3E1-OTB-20190122 ModB-SUB320` uses template `NircamEngineeringImaging`
APTObservationParams Dictionary holds 1 entries before reading template
Primary dither element PrimaryDithers not found

Found 1 tile(s) for observation 093 NIRCam EO-13 GSEG3E3-OTB-20190429 ModA-SUB160
Found 1 visits with numbers: [1]
APTObservationParams Dictionary holds 93 entries after reading template (+1 entries)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Observation `094` labelled `NIRCam EO-14 GSEG3E2-OTB-20190318 ModA-SUB400P` uses template `NircamEngineeringImaging`
APTObservationParams Dictionary holds 93 entries before reading template
Primary dither element PrimaryDithers not found, use default primary dithers value (1).
Number of dithers: 1 primary * 1 subpixel = 1
Dictionary read from template has 1 entries.
Found 1 tile(s) for observation 094 NIRCam EO-14 GSEG3E2-OTB-20190318 ModA-SUB400P
Found 1 visits with numbers: [1]
APTObservationParams Dictionary holds 94 entries after reading template (+1 entries)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Observation `095` labelled `

In [48]:
apt_xml_dict['Mode']

['imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'imaging',
 'im

In [None]:
gseg3otb_dryrun2: imaging, grism=None, APTTemplate='NircamEngineeringImaging'
gseg3ete_dataflow3: imaging, grism=None

In [38]:
files = glob('/ifs/jwst/wit/witserv/data7/nrc/gseg3/MAST_2019-07-19T1724/JWST/*/*uncal.fits')
for f in files:
    print(os.path.basename(f), fits.getheader(f)['ASNPOOL'])

KeyError: "Keyword 'ASNPOOL' not found."