diff --git a/CHANGELOG.md b/CHANGELOG.md index 54009ddf..808679db 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,11 +2,16 @@ All notable changes to this project will be documented in this file. This project adheres to [Semantic Versioning](https://semver.org/). -## [0.1.0] - 2021-XX-XX +## [0.1.0] - 2022-XX-XX - Enhancements - Improved definitions of general and GNSS meta data - Removed unused logic in cleaning routines - Moved warning for no cleaning of JRO ISR data to preprocess + - Added a general Madrigal instrument for time-series data + - Added functions to specify all known Madrigal instrument codes and + file formats + - Adapted general listing functions to allow file formats with '*' wildcards + between '.' delimiters, required for some Madrigal file formats - Standardized the Instrument method kwarg defaults - Added 'site' tag to the GNSS TEC Instrument - Added support for varied use of `two_digit_year_break` to @@ -17,6 +22,7 @@ This project adheres to [Semantic Versioning](https://semver.org/). - Improved the docstring style - Testing - Added unit tests for general, JRO, and GNSS methods + - Added the packaging module to handle version logic - Bug - Fixed bugs in the coordinate conversion functions diff --git a/docs/supported_instruments.rst b/docs/supported_instruments.rst index afe5f1cd..ca45d415 100644 --- a/docs/supported_instruments.rst +++ b/docs/supported_instruments.rst @@ -22,7 +22,7 @@ TEC is provided by MIT Haystack. :members: JRO_ISR ----------------- +------- The incoherent scatter radar (ISR) at the `Jicamarca Radio Observatory `_ regularly @@ -31,3 +31,22 @@ magnetic equator over Peru. .. automodule:: pysatMadrigal.instruments.jro_isr :members: + + +Madrigal_Pandas +--------------- + +A general instrument for Madrigal time-series data. This +:py:class:`pysat.Instrument` uses Madrigal instrument codes and kindats to +support the use of any of the Madrigal time-series data sets. There are some +further constraints in that the data set's Madrigal naming convention must be +parsable by pysat. Currently nine Madrigal instrument codes are supported by +this :py:class:`pysat.Instrument`. When possible, using a specific instrument +module is recommended, since that instrument module will have additional +support (e.g., cleaning methods, experiment acknowledgements, and references). + +.. automodule:: pysatMadrigal.instruments.madrigal_pandas + :members: + + + diff --git a/pysatMadrigal/instruments/__init__.py b/pysatMadrigal/instruments/__init__.py index 0ad832e6..3a8698b7 100644 --- a/pysatMadrigal/instruments/__init__.py +++ b/pysatMadrigal/instruments/__init__.py @@ -1,8 +1,11 @@ # Import Madrigal instruments -from pysatMadrigal.instruments import dmsp_ivm, gnss_tec, jro_isr +from pysatMadrigal.instruments import dmsp_ivm +from pysatMadrigal.instruments import gnss_tec +from pysatMadrigal.instruments import jro_isr +from pysatMadrigal.instruments import madrigal_pandas # Import Madrigal methods from pysatMadrigal.instruments import methods # noqa F401 # Define variable name with all available instruments -__all__ = ['dmsp_ivm', 'gnss_tec', 'jro_isr'] +__all__ = ['dmsp_ivm', 'gnss_tec', 'jro_isr', 'madrigal_pandas'] diff --git a/pysatMadrigal/instruments/gnss_tec.py b/pysatMadrigal/instruments/gnss_tec.py index c97358b4..3e75b288 100644 --- a/pysatMadrigal/instruments/gnss_tec.py +++ b/pysatMadrigal/instruments/gnss_tec.py @@ -53,22 +53,21 @@ pandas_format = False +# Madrigal tags +madrigal_inst_code = 8000 +madrigal_tag = {'': {'vtec': '3500', 'site': '3506'}} +# TODO(#12): `, 'los': '3505'}}` + # Local attributes -dname = '{{month:02d}}{{day:02d}}' -vname = '.{{version:03d}}' -supported_tags = {ss: {'vtec': ''.join(['gps{{year:02d}}', dname, 'g', vname, - ".{file_type}"]), - 'site': ''.join(['site_{{year:04d}}', dname, vname, - ".{file_type}"])} +fname = general.madrigal_file_format_str(madrigal_inst_code, + verbose=False).split("*") +supported_tags = {ss: {'vtec': ''.join(['gps', fname[1], 'g', fname[2]]), + 'site': ''.join(['site_{{year:04d}}{{month:02d}}', + '{{day:02d}}', fname[2]])} for ss in inst_ids.keys()} remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5') for kk in inst_ids[ss]} for ss in inst_ids.keys()} -# Madrigal tags -madrigal_inst_code = 8000 -madrigal_tag = {'': {'vtec': '3500', 'site': '3506'}} -# , 'los': '3505'}} <- Issue #12 - # ---------------------------------------------------------------------------- # Instrument test attributes diff --git a/pysatMadrigal/instruments/jro_isr.py b/pysatMadrigal/instruments/jro_isr.py index b9c5a02e..a104d6ec 100644 --- a/pysatMadrigal/instruments/jro_isr.py +++ b/pysatMadrigal/instruments/jro_isr.py @@ -1,13 +1,11 @@ +#!/usr/bin/env python +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# ---------------------------------------------------------------------------- # -*- coding: utf-8 -*-. """Supports the Incoherent Scatter Radar at the Jicamarca Radio Observatory. -The Incoherent Scatter Radar (ISR) at the Jicamarca Radio Observatory (JRO) -observes ion drifts, line-of-sight neutral winds, electron density and -temperature, ion temperature, and ion composition through three overarching -experiments. - -Downloads data from the JRO Madrigal Database. - Properties ---------- platform @@ -30,7 +28,15 @@ Note ---- - Please provide name and email when downloading data with this routine. +The Incoherent Scatter Radar (ISR) at the Jicamarca Radio Observatory (JRO) +observes ion drifts, line-of-sight neutral winds, electron density and +temperature, ion temperature, and ion composition through three overarching +experiments. + +Downloads data from the JRO Madrigal Database. + +Please provide name (user) and email (password) when downloading data with this +routine. """ @@ -55,24 +61,23 @@ pandas_format = False -# Local attributes -jro_fname1 = 'jro{{year:4d}}{{month:02d}}{{day:02d}}' -jro_fname2 = '.{{version:03d}}.{file_type}' -supported_tags = {ss: {'drifts': jro_fname1 + "drifts" + jro_fname2, - 'drifts_ave': jro_fname1 + "drifts_avg" + jro_fname2, - 'oblique_stan': jro_fname1 + jro_fname2, - 'oblique_rand': jro_fname1 + "?" + jro_fname2, - 'oblique_long': jro_fname1 + "?" + jro_fname2} - for ss in inst_ids.keys()} -remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5') - for kk in inst_ids[ss]} for ss in inst_ids.keys()} - # Madrigal tags madrigal_inst_code = 10 madrigal_tag = {'': {'drifts': "1910", 'drifts_ave': "1911", 'oblique_stan': "1800", 'oblique_rand': "1801", 'oblique_long': "1802"}, } +# Local attributes +jro_fname = general.madrigal_file_format_str(madrigal_inst_code, verbose=False) +supported_tags = {ss: {'drifts': jro_fname.replace("*", "drifts"), + 'drifts_ave': jro_fname.replace("*", "drifts_avg"), + 'oblique_stan': jro_fname.replace("*", ""), + 'oblique_rand': jro_fname.replace("*", "?"), + 'oblique_long': jro_fname.replace("*", "?")} + for ss in inst_ids.keys()} +remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5') + for kk in inst_ids[ss]} for ss in inst_ids.keys()} + # ---------------------------------------------------------------------------- # Instrument test attributes @@ -202,7 +207,7 @@ def download(date_array, tag='', inst_id='', data_path=None, user=None, Notes ----- The user's names should be provided in field user. Ruby Payne-Scott should - be entered as Ruby+Payne-Scott + be entered as "Ruby Payne-Scott" The password field should be the user's email address. These parameters are passed to Madrigal when downloading. @@ -214,6 +219,7 @@ def download(date_array, tag='', inst_id='', data_path=None, user=None, general.download(date_array, inst_code=str(madrigal_inst_code), kindat=madrigal_tag[inst_id][tag], data_path=data_path, user=user, password=password, file_type=file_type) + return def load(fnames, tag='', inst_id=''): diff --git a/pysatMadrigal/instruments/madrigal_pandas.py b/pysatMadrigal/instruments/madrigal_pandas.py new file mode 100644 index 00000000..57fb6bde --- /dev/null +++ b/pysatMadrigal/instruments/madrigal_pandas.py @@ -0,0 +1,365 @@ +#!/usr/bin/env python +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# ---------------------------------------------------------------------------- +# -*- coding: utf-8 -*-. +"""Supports generalized access to Madrigal Data. + +Properties +---------- +platform + 'madrigal' +name + 'pandas' +tag + Madrigal instrument code as a string +inst_id + Madrigal kindat as a string + +Note +---- +To use this routine, you need to know both the Madrigal Instrument code +as well as the data tag numbers that Madrigal uses to uniquely identify +data sets. You also need to know that the data is a simple time series (e.g., +satellite in situ observations). + +Multiple kindat values are supported, as long as they are separated by commas. + +Although you can use this instrument module for any time-series data set, we +highly recommend using the instrument-specific module if it exists. + +Please provide name (user) and email (password) when downloading data with this +routine. + +Warnings +-------- +All data downloaded under this general support is placed in the same directory, +pysat_data_dir/madrigal/pandas/. For technical reasons, the file search +algorithm for pysat's Madrigal support is set to permissive defaults. Thus, all +instrument files downloaded via this interface will be picked up by the madrigal +pandas pysat Instrument object unless the file_format keyword is used at +instantiation. + +Files can be safely downloaded without knowing the file_format keyword, +or equivalently, how Madrigal names the files. See `Examples` for more. + +Examples +-------- +:: + + + import datetime as dt + import pysat + import pysatMadrigal as py_mad + + # Download DMSP data from Madrigal + dmsp_abi = pysat.Instrument(inst_module=py_mad.instruments.madrigal_pandas, + tag='180', kindat='17110') + dmsp_abi.download(dt.datetime(2015, 1, 1), dt.datetime(2015, 1, 2), + user='Firstname+Lastname', password='email@address.com') + dmsp_abi.load(date=dt.datetime(2015, 1, 1)) + +""" + +import datetime as dt + +from pysat import logger + +from pysatMadrigal.instruments.methods import general + +# ---------------------------------------------------------------------------- +# Instrument attributes + +platform = 'madrigal' +name = 'pandas' +tags = dict() +pandas_format = True +excluded_tags = ['8105'] # Pandas-style data that requires special support + +# Assign only tags with pysat-compatible file format strings +pandas_codes = general.known_madrigal_inst_codes(pandas_format=True) +for tag in pandas_codes.keys(): + try: + general.madrigal_file_format_str(tag, strict=True) + if tag not in excluded_tags: + tags[tag] = pandas_codes[tag] + except ValueError: + pass + +inst_ids = {'': list(tags.keys())} # There are too many kindat to track here + +# Local attributes +# +# Need a way to get the filename strings for a particular instrument unless +# wildcards start working +supported_tags = {ss: {tag: general.madrigal_file_format_str(tag) + for tag in inst_ids[ss]} for ss in inst_ids.keys()} + +# ---------------------------------------------------------------------------- +# Instrument test attributes + +# Need to sort out test day setting for unit testing, maybe through a remote +# function +tag_dates = {'120': dt.datetime(1963, 11, 27), '170': dt.datetime(1998, 7, 1), + '180': dt.datetime(2000, 1, 1), '210': dt.datetime(1950, 1, 1), + '211': dt.datetime(1978, 1, 1), '212': dt.datetime(1957, 1, 1), + '7800': dt.datetime(2009, 11, 10)} +_test_dates = {'': {tag: tag_dates[tag] if tag in tag_dates.keys() + else tag_dates['7800'] for tag in tags.keys()}} +_test_download = {'': {tag: True for tag in tags.keys()}} + +# ---------------------------------------------------------------------------- +# Instrument methods + + +def init(self, kindat=''): + """Initializes the Instrument object in support of Madrigal access + + Parameters + ---------- + kindat : str + Madrigal instrument experiment code(s). (default='') + + """ + + # Set the standard pysat attributes + self.acknowledgements = general.cedar_rules() + self.references = 'Please remember to cite the instrument articles.' + + # If the kindat (madrigal tag) is not known, advise user + self.kindat = kindat + if self.kindat == '': + logger.warning('`inst_id` did not supply KINDAT, all will be returned.') + + # Remind the user of the Rules of the Road + logger.info(self.acknowledgements) + return + + +def clean(self): + """Placeholder routine that would normally return cleaned data + + Note + ---- + Supports 'clean', 'dusty', 'dirty' in the sense that it prints + a message noting there is no cleaning. + 'None' is also supported as it signifies no cleaning. + + Routine is called by pysat, and not by the end user directly. + + """ + + if self.clean_level in ['clean', 'dusty', 'dirty']: + logger.warning(''.join(["The generalized Madrigal data Instrument ", + "can't support instrument-specific cleaning."])) + + return + + +# ---------------------------------------------------------------------------- +# Instrument functions +# +# Use the default Madrigal and pysat methods + +# Set the load routine +load = general.load + + +# Set the list routine +def list_files(tag, inst_id, data_path, kindat='', format_str=None, + file_cadence=dt.timedelta(days=1), delimiter=None, + file_type=None): + """Create a Pandas Series of every file for chosen Instrument data. + + Parameters + ---------- + tag : str + Denotes type of file to load. Accepts strings corresponding to the + appropriate Madrigal Instrument `tags`. + inst_id : str + Specifies the instrument ID to load. Accepts strings corresponding to + the appropriate Madrigal Instrument `inst_ids`. + data_path : str + Path to data directory. + kindat : str + Madrigal KINDAT code, specifies an experiment for the specified + instrument. May be a single value, blank (all), or a comma-delimited + list. (defaut='') + format_str : str or NoneType + User specified file format. If None is specified, the default + formats associated with the supplied tags are used. (default=None) + supported_tags : dict or NoneType + Keys are inst_id, each containing a dict keyed by tag + where the values file format template strings. (default=None) + file_cadence : dt.timedelta or pds.DateOffset + pysat assumes a daily file cadence, but some instrument data file + contain longer periods of time. This parameter allows the specification + of regular file cadences greater than or equal to a day (e.g., weekly, + monthly, or yearly). (default=dt.timedelta(days=1)) + delimiter : str or NoneType + Delimiter string upon which files will be split (e.g., '.'). If None, + filenames will be parsed presuming a fixed width format. (default=None) + file_type : str or NoneType + File format for Madrigal data. Load routines currently accepts 'hdf5', + 'simple', and 'netCDF4', but any of the Madrigal options may be used + here. If None, will look for all known file types. (default=None) + + Returns + ------- + out : pds.Series + A pandas Series containing the verified available files + + """ + if kindat == '': + kindat = "*" + + # Get the remote file type format + local_tags = {ss: {kk: supported_tags[ss][kk].replace("{kindat}", kindat) + for kk in inst_ids[ss]} for ss in inst_ids.keys()} + + # Determine the two-digit year break value + if local_tags[inst_id][tag].find("{year:04d}") >= 0: + two_digit_year_break = None + else: + two_digit_year_break = 50 + + # Determine if a delimiter is needed + if delimiter is None and local_tags[inst_id][tag].find('*') >= 0: + delimiter = '.' + + out = general.list_files(tag, inst_id, data_path, format_str=format_str, + supported_tags=local_tags, + file_cadence=file_cadence, + two_digit_year_break=two_digit_year_break, + delimiter=delimiter, file_type=file_type) + + return out + + +def download(date_array, tag='', inst_id='', data_path=None, user=None, + password=None, file_type='hdf5', kindat=''): + """Downloads data from Madrigal. + + Parameters + ---------- + date_array : array-like + list of datetimes to download data for. The sequence of dates need not + be contiguous. + tag : str + Madrigal Instrument code cast as a string. (default='') + inst_id : str + Satellite ID string identifier used for particular dataset. (default='') + data_path : str + Path to directory to download data to. (default=None) + user : str + User string input used for download. Provided by user and passed via + pysat. If an account is required for dowloads this routine here must + error if user not supplied. (default=None) + password : str + Password for data download. (default=None) + file_type : str + File format for Madrigal data. (default='hdf5') + kindat : str + Madrigal KINDAT code, specifies an experiment for the specified + instrument. May be a single value, blank (all), or a comma-delimited + list. (defaut='') + + Notes + ----- + The user's names should be provided in field user. Maria Goeppert Mayer + should be entered as "Maria Goeppert Mayer" + + The password field should be the user's email address. These parameters + are passed to Madrigal when downloading. + + The affiliation field is set to pysat to enable tracking of pysat + downloads. + + """ + + general.download(date_array, inst_code=tag, kindat=kindat, + data_path=data_path, user=user, password=password, + file_type=file_type) + return + + +def list_remote_files(tag, inst_id, kindat='', user=None, password=None, + url="http://cedar.openmadrigal.org", + start=dt.datetime(1900, 1, 1), stop=dt.datetime.utcnow()): + """List files available from Madrigal. + + Parameters + ---------- + tag : str + Denotes type of file to load. Accepts strings corresponding to the + appropriate Madrigal Instrument `tags`. + inst_id : str + Specifies the instrument ID to load. Accepts strings corresponding to + the appropriate Madrigal Instrument `inst_ids`. + kindat : str + Madrigal KINDAT code, specifies an experiment for the specified + instrument. May be a single value, blank (all), or a comma-delimited + list. (defaut='') + data_path : str or NoneType + Path to directory to download data to. (default=None) + user : str or NoneType + User string input used for download. Provided by user and passed via + pysat. If an account is required for dowloads this routine here must + error if user not supplied. (default=None) + password : str or NoneType + Password for data download. (default=None) + url : str + URL for Madrigal site (default='http://cedar.openmadrigal.org') + start : dt.datetime + Starting time for file list (default=dt.datetime(1900, 1, 1)) + stop : dt.datetime + Ending time for the file list (default=dt.datetime.utcnow()) + + Returns + ------- + remote_files : pds.Series + A series of filenames, see `pysat.utils.files.process_parsed_filenames` + for more information. + + Raises + ------ + ValueError + For missing kwarg input + KeyError + For dictionary input missing requested tag/inst_id + + Note + ---- + The password field should be the user's email address. These parameters + are passed to Madrigal when downloading. + + The affiliation field is set to pysat to enable tracking of pysat + downloads. + + """ + if kindat == '': + kindat = "*" + + # Get the remote file type format + remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5', + kindat=kindat) + for kk in inst_ids[ss]} for ss in inst_ids.keys()} + + # Determine the two-digit year break value + if remote_tags[inst_id][tag].find("{year:04d}") >= 0: + two_digit_year_break = None + else: + two_digit_year_break = 50 + + # Set the kindat dictionary + kindats = {ss: {kk: kindat if kk == tag else '' for kk in inst_ids[ss]} + for ss in inst_ids.keys()} + + # Set the list_remote_files routine + remote_files = general.list_remote_files( + tag, inst_id, inst_code=int(tag), kindats=kindats, user=user, + password=password, supported_tags=remote_tags, url=url, + two_digit_year_break=two_digit_year_break, start=start, stop=stop) + + return remote_files diff --git a/pysatMadrigal/instruments/methods/general.py b/pysatMadrigal/instruments/methods/general.py index 5ed75e1f..bc26563a 100644 --- a/pysatMadrigal/instruments/methods/general.py +++ b/pysatMadrigal/instruments/methods/general.py @@ -1,3 +1,8 @@ +#!/usr/bin/env python +# Full license can be found in License.md +# Full author list can be found in .zenodo.json file +# DOI:10.5281/zenodo.3824979 +# ---------------------------------------------------------------------------- # -*- coding: utf-8 -*-. """General routines for integrating CEDAR Madrigal instruments into pysat. @@ -34,6 +39,459 @@ def cedar_rules(): return ackn +def known_madrigal_inst_codes(pandas_format=None): + """Supplies known Madrigal instrument codes with a brief description. + + Parameters + ---------- + pandas_format : bool or NoneType + Separate instrument codes by time-series (True) or multi-dimensional + data types (False) if a boolean is supplied, or supply all if NoneType + (default=None) + + Returns + ------- + inst_codes : dict + Dictionary with string instrument code values as keys and a brief + description of the corresponding instrument as the value. + + """ + + time_series = {'120': 'Interplanetary Mag Field and Solar Wind', + '210': 'Geophysical Indicies', '211': 'AE Index', + '212': 'DST Index', '170': 'POES Spacecraft Particle Flux', + '180': 'DMSP-Auroral Boundary Index', + '8100': 'Defense Meteorological Satellite Program', + '8105': 'Van Allen Probes', '8400': 'Jason/Topex Ocean TEC', + '8250': 'Jicamarca Magnetometer', + '8255': 'Piura Magnetometer', + '8300': 'Sodankyla Magnetometer', + '7800': 'Green Bank Telescope'} + multi_dim = {'10': 'Jicamarca ISR', '20': 'Arecibo ISR Linefeed', + '21': 'Arecibo ISR Gregorian', + '22': 'Arecibo ISR Velocity Vector', + '25': 'MU ISR', '30': 'Millstone Hill ISR', + '31': 'Millstone Hill UHF Steerable Antenna', + '32': 'Millstone Hill UHF Zenith Antenna', + '40': 'St. Santin ISR', '41': 'St. Santin Nançay Receiver', + '42': 'St. Santin Mende Receiver', + '43': 'St. Santin Monpazier Receiver', + '45': 'Kharkov Ukraine ISR', '50': 'Chatanika ISR', + '53': 'ISTP Irkutsk Radar', '57': 'UK Malvern ISR', + '61': 'Poker Flat ISR', '70': 'EISCAT combined ISRs', + '71': 'EISCAT Kiruna UHF ISR', '72': 'EISCAT Tromsø UHF ISR', + '73': 'EISCAT Sodankylä UHF ISR', + '74': 'EISCAT Tromsø VHF ISR', '75': 'EISCAT Kiruna VHF ISR', + '76': 'EISCAT Sodankylä VHF ISR', '80': 'Sondrestrom ISR', + '85': 'ALTAIR ISR', '91': 'Resolute Bay North ISR', + '92': 'Resolute Bay Canada ISR', + '95': 'EISCAT Svalbard ISR Longyearbyen', + '100': 'QuJing ISR', '310': 'TGCM/TIGCM model', + '311': 'AMIE Model', '312': 'USU-TDIM Model', + '320': 'Solar sd Tides', '321': 'Lunar sd Tides', + '322': 'GSWM model', '820': 'Halley HF Radar', + '830': 'Syowa Station HF Radar', '845': 'Kapuskasing HF Radar', + '861': 'Saskatoon HF Radar', '870': 'Goose Bay HF Radar', + '900': 'Hankasalmi HF Radar', '910': 'Stokkseyri HF Radar', + '911': 'Pykkvibaer HF Radar', '1040': 'Arecibo MST Radar', + '1140': 'Poker Flat MST Radar', + '1180': 'SOUSY Svalbard MST Radar Longyearbyen', + '1210': 'Scott Base MF Radar', + '1215': 'Davis Antarctica MF radar', + '1220': 'Mawson MF Radar', '1221': 'Rothera MF radar', + '1230': 'Christchurch MF Radar', + '1240': 'Adelaide MF Radar', '1245': 'Rarotonga MF radar', + '1254': 'Tirunelveli MF radar', '1270': 'Kauai MF radar', + '1275': 'Yamagawa MF radar', '1285': 'Platteville MF radar', + '1310': 'Wakkanai MF radar', '1320': 'Collm LF Radar', + '1340': 'Saskatoon MF Radar', + '1375': 'The Poker Flat MF radar', '1390': 'Tromsø MF Radar', + '1395': 'Syowa MF Radar', '1400': 'Halley MF Radar', + '13': 'JASMET Jicamarca All-Sky Specular Meteor Radar', + '1539': 'Ascension Island Meteor Radar', + '1540': 'Rothera Meteor Radar', + '1560': 'Atlanta meteor Radar', '1620': 'Durham meteor Radar', + '1750': 'Obninsk meteor radar', '1775': 'Esrange meteor radar', + '1780': 'Wuhan meteor radar', '1781': 'Mohe meteor radar', + '1782': 'Beijing meteor radar', '1783': 'Sanya meteor radar', + '1784': 'South Pole meteor radar', + '1785': 'Southern Argentina Agile Meteor Radar', + '1786': 'Cachoeira Paulista Meteor Radar', + '1787': 'Buckland Park Meteor Radar', + '1788': 'Kingston Meteor Radar', '1790': 'Andes Meteor Radar', + '1791': 'Southern Cross Meteor Radar', + '1792': 'Las Campanas Meteor Radar', + '1793': 'CONDOR multi-static meteor radar system', + '2090': 'Christmas Island ST/MEDAC Radar', + '2200': 'Platteville ST/MEDAC Radar', + '2550': 'ULowell Digisonde MLH Radar', + '2890': 'Sondre Stromfjord Digisonde', + '2900': 'Sodankylä Ionosonde (SO166)', + '2930': 'Qaanaaq Digisonde ST/MEDAC Radars', + '2950': 'EISCAT Tromsø Dynasonde', + '2951': 'EISCAT Svalbard Dynasonde', + '2952': 'IRF Dynasonde at EISCAT site Kiruna', + '5000': 'South Pole Fabry-Perot', '5005': 'Palmer Fabry Perot', + '5015': 'Arrival Heights Fabry-Perot', + '5020': 'Halley Fabry-Perot', + '5060': 'Mount John Fabry-Perot', + '5140': 'Fabry-Perot Arequipa', + '5145': 'Fabry-Perot Jicamarca', '5150': 'Fabry-Perot Mobile', + '5160': 'Arecibo Fabry-Perot', + '5190': 'Kitt Peak H-alpha Fabry-Perot', + '5240': 'Fritz Peak Fabry-Perot', + '5292': 'Ann Arbor Fabry-Perot', + '5300': 'Peach Mountain Fabry-Perot', + '5340': 'Millstone Hill Fabry-Perot', + '5360': 'Millstone Hill High-Res Fabry-Perot', + '5370': 'Arecibo Imaging Doppler Fabry-Perot', + '5380': 'Culebra Fabry-Perot', + '5430': 'Watson Lake Fabry-Perot', + '5460': 'College Fabry-Perot', + '5465': 'Poker Flat all-sky scanning Fabry-Perot', + '5470': 'Fort Yukon Fabry-Perot', + '5475': 'Poker Flat Fabry-Perot', + '5480': 'Sondre Stromfjord Fabry-Perots', + '5510': 'Inuvik NWT Fabry-Perot', + '5535': 'Resolute Bay Fabry-Perot', + '5540': 'Thule Fabry-Perot', '5545': 'Cariri Brazil FPI', + '5546': 'Cajazeiras Brazil FPI', + '5547': 'Pisgah Astronomical Research FPI', + '5548': 'Urbana Atmospheric Observatory FPI', + '5549': 'Kirtland Airforce Base FPI', + '5550': 'Virginia Tech FPI', + '5551': 'Peach Mountain (MiniME) FPI', + '5552': 'Merihill Peru FPI', '5553': 'Nazca Peru FPI', + '5554': 'Eastern Kentucky FPI', + '5600': 'Jang Bogo Station FPI', + '5700': 'South Pole Michelson Interferometer', + '5720': 'Daytona Beach Michelson Interferometer', + '5860': 'Stockholm IR Michelson', + '5900': 'Sondrestrom Michelson Interferometer', + '5950': 'Resolute Bay Michelson Interferometer', + '5980': 'Eureka Michelson Interferometer', + '6205': 'Arecibo Potassium [K] lidar', + '6206': 'Arecibo Sodium [Na] lidar', + '6300': 'CEDAR lidar', '6320': 'Colorado State sodium lidar', + '6330': 'Rayleigh lidar at the ALO - USU/CASS', + '6340': 'Andes Na T/W Lidar', '6350': 'ALOMAR Sodium Lidar', + '6360': 'CU STAR Sodium Lidar', '6370': 'USU Na lidar', + '6380': 'Poker Flat lidar', '7190': 'USU CCD Imager', + '7192': 'USU Advanced Mesospheric Temperature Mapper', + '7200': 'BU Millstone All-Sky Imager', + '7201': 'BU Arecibo All-Sky Imager', + '7202': 'BU Asiago All-Sky Imager', + '7203': 'BU El Leoncito All-Sky Imager', + '7204': 'BU McDonald All-Sky Imager', + '7205': 'BU Rio Grande All-Sky Imager', + '7206': 'BU Jicamarca All-Sky Imager', '7240': 'MIO', + '7580': 'All-sky cameras at Qaanaaq', + '11': 'Jicamarca Bistatic Radar', '840': 'JULIA', + '3000': 'ARL UT TBB Receiver', + '7600': 'Chelmsford HS Ozone Radiometer', + '7602': 'Lancaster UK Ozone Radiometer', + '7603': 'Bridgewater MA Ozone Radiometer', + '7604': 'Union College Ozone Radiometer', + '7605': 'UNC Greensboro Ozone Radiometer', + '7606': 'Lynnfield HS Ozone Radiometer', + '7607': 'Alaska Pacific Ozone Radiometer', + '7608': 'Hermanus SA Ozone Radiometer', + '7609': 'Sanae Antarctic Ozone Radiometer', + '7610': 'Sodankylä Ozone Radiometer', + '7611': 'Lancaster2 UK Ozone Radiometer', + '7612': 'Haystack Ridge Ozone Radiometer', + '7613': 'Haystack NUC3 8-channel Ozone Radiometer', + '7614': 'Fairbanks Ozone Radiometer', + '8001': 'South Pole Scintillation Receiver', + '8000': 'World-wide GNSS Receiver Network', + '8002': 'McMurdo Scintillation Receiver', + '8010': 'GNSS Scintillation Network', + '3010': 'Davis Czerny-Turner Scanning Spectrophotometer', + '3320': 'Wuppertal (DE) Czerny-Turner OH Grating Spectrometer', + '4470': 'Poker Flat 4 Channel Filter Photometer', + '4473': 'Fort Yukon 4 Channel Filter Photometer', + '4480': 'Arecibo red line photometer', + '4481': 'Arecibo green line photometer', + '7191': 'USU Mesospheric Temperature Mapper'} + + if pandas_format is None: + inst_codes = dict(**time_series, **multi_dim) + elif pandas_format: + inst_codes = time_series + else: + inst_codes = multi_dim + + return inst_codes + + +def madrigal_file_format_str(inst_code, strict=False, verbose=True): + """Supplies known Madrigal instrument codes with a brief description. + + Parameters + ---------- + inst_code : int + Madrigal instrument code as an integer + strict : bool + If True, returns only file formats that will definitely not have a + problem being parsed by pysat. If False, will return any file format. + (default=False) + verbose : bool + If True raises logging warnings, if False does not log any warnings. + (default=True) + + Returns + ------- + fstr : str + File formatting string that may or may not be parsable by pysat + + Raises + ------ + ValueError + If file formats with problems would be returned and `strict` is True. + + Note + ---- + File strings that have multiple '*' wildcards typically have several + experiment types and require a full pysat Instrument to properly manage + these types. + + """ + + if not isinstance(inst_code, int): + inst_code = int(inst_code) + + format_str = { + 120: 'imf{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 210: 'geo{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 211: 'aei{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 212: 'dst{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 170: 'pfx{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 180: 'dmp{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 8100: 'dms*_{{year:04d}}{{month:02d}}{{day:02d}}_*.{{version:03d}}.', + 8105: 'van_allen_{{year:04d}}_{{month:02d}}.{{version:03d}}.', + 8400: '???{{year:04d}}{{month:02d}}{{day:02d}}j*.{{version:03d}}.', + 8250: 'jic{{year:04d}}{{month:02d}}{{day:02d}}_mag.{{version:03d}}.', + 8255: 'pmt*.', + 8300: 'smt*.', + 7800: 'gbt{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 10: 'jro{{year:04d}}{{month:02d}}{{day:02d}}*.{{version:03d}}.', + 20: 'aro*{{year:02d}}{{month:02d}}{{day:02d}}a.{{version:03d}}.', + 21: 'aro*{{year:02d}}{{month:02d}}{{day:02d}}*g.{{version:03d}}.', + 22: 'ar?*{{year:02d}}{{month:02d}}{{day:02d}}*.{{version:03d}}.', + 25: 'mui{{year:02d}}{{month:02d}}{{day:02d}}?.{{version:03d}}.', + 30: 'mlh{{year:02d}}{{month:02d}}{{day:02d}}?.{{version:03d}}.', + 31: 'mlh{{year:02d}}{{month:02d}}{{day:02d}}?.{{version:03d}}.', + 32: 'mlh{{year:02d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 40: 'sts{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 41: 'sts{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 42: 'sts{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 43: 'sts{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 45: 'kha{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 50: 'cht{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 53: 'ist{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 57: 'mlv{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 61: 'pfa{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 70: 'MAD????_{year:04d}}-{{month:02d}}-{{day:02d}}_*.', + 71: 'MAD????_{year:04d}}-{{month:02d}}-{{day:02d}}_*@kir.', + 72: 'MAD????_{year:04d}}-{{month:02d}}-{{day:02d}}_*@uhf.', + 73: 'MAD????_{year:04d}}-{{month:02d}}-{{day:02d}}_*@sod.', + 74: 'MAD????_{year:04d}}-{{month:02d}}-{{day:02d}}_*@vhf.', + 75: 'MAD????_{year:04d}}-{{month:02d}}-{{day:02d}}_*@vkrv*.', + 76: 'MAD????_{year:04d}}-{{month:02d}}-{{day:02d}}_*@sdv*.', + 80: 'son{{year:02d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 85: 'ALT{{year:02d}}{{month:02d}}{{day:02d}}_*.', + 91: 'ran{{year:02d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 92: 'ras{{year:02d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 95: 'MAD????_{year:04d}}-{{month:02d}}-{{day:02d}}_*@esr.', + 100: 'MAD????_{year:04d}}-{{month:02d}}-{{day:02d}}_*@quj.', + 310: 'gcm*.', + 311: 'ami*.', + 312: 'tdi*.', + 320: 'sdt*.', + 321: 'sdl*.', + 322: 'gsw*.', + 820: 'hhf*.', + 830: 'syf*.', + 845: 'khf*.', + 861: 'shf*.', + 870: 'gbf*.', + 900: 'fhf*.', + 910: 'whf*.', + 911: 'ehf*.', + 1040: 'arm{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1140: 'pkr{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1180: 'ssr*.', + 1210: 'sbf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1215: 'dav{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1220: 'maf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1221: 'rth{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1230: 'ccf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1240: 'adf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1245: 'rtg{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1254: 'tyr{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1270: 'kau{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1275: 'yam{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1285: 'plr{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1310: 'wak{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1320: 'cof{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1340: 'saf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1375: 'rpk{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1390: 'trf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1395: 'sym_{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 1400: 'hmf_{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 13: 'D{{year:04d}}{{month:02d}}*.', + 1539: 'asc{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1540: 'rmr_{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 1560: 'atm{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1620: 'dum{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 1750: 'obn{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1775: 'emr{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 1780: 'wmr*.', + 1781: 'mmr*.', + 1782: 'bmr*.', + 1783: 'smr*.', + 1784: 'som{{year:02d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 1785: 'amr{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 1786: 'cpr_{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 1787: 'bpr_{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 1788: 'kgr_{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 1790: 'ame*.', + 1791: 'sco*.', + 1792: 'lcm*.', + 1793: 'alo{{year:04d}}{{month:02d}}{{day:02d}}_{{version:03d}}.', + 2090: 'cia{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 2200: 'pla{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 2550: 'uld*.', + 2890: 'ssd{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 2900: 'sdi*.', + 2930: 'qad{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 2950: 'trd*.', + 2951: 'lrd*.', + 2952: 'krd*.', + 5000: 'spf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5005: 'pfi{{year:04d}}{{month:02d}}{{day:02d}}.', + 5015: 'ahf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5020: 'hfp{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5060: 'mjf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5140: 'aqf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5145: 'jfp{{year:04d}}{{month:02d}}{{day:02d}}_*.{{version:03d}}.', + 5150: 'mfp{{year:04d}}{{month:02d}}{{day:02d}}_*.{{version:03d}}.', + 5160: 'afp{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5190: 'kha{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5240: 'fpf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5292: 'aaf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5300: 'pfp{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5340: 'mfp{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.?.', + 5360: 'kfp{{year:02d}}{{month:02d}}{{day:02d}}g*.', + 5370: 'aif{{year:02d}}{{month:02d}}{{day:02d}}g*.', + 5380: 'clf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5430: 'wfp{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5460: 'cfp{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5465: 'pkf{{year:02d}}{{month:02d}}{{day:02d}}*.', + 5470: 'FYU{{year:04d}}{{month:02d}}{{day:02d}}.', + 5475: 'PKZ{{year:04d}}{{month:02d}}{{day:02d}}.', + 5480: 'sfp{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5510: 'ikf{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5535: 'rfp{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5540: 'tfp{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5545: ''.join(['minime01_car_{{year:04d}}{{month:02d}}{{day:02d}}.', + 'cedar.{{version:03d}}.']), + 5546: ''.join(['minime02_caj_{{year:04d}}{{month:02d}}{{day:02d}}.', + 'cedar.{{version:03d}}.']), + 5547: ''.join(['minime06_par_{{year:04d}}{{month:02d}}{{day:02d}}.', + 'cedar.{{version:03d}}.']), + 5548: ''.join(['minime02_uao_{{year:04d}}{{month:02d}}{{day:02d}}.', + 'cedar.{{version:03d}}.']), + 5549: 'Kirtland Airforce Base FPI', + 5550: ''.join(['minime09_vti_{{year:04d}}{{month:02d}}{{day:02d}}.', + 'cedar.{{version:03d}}.']), + 5551: ''.join(['minime08_ann_{{year:04d}}{{month:02d}}{{day:02d}}.', + 'cedar.{{version:03d}}.']), + 5552: 'Merihill Peru FPI', + 5553: 'Nazca Peru FPI', + 5554: ''.join(['minime07_euk_{{year:04d}}{{month:02d}}{{day:02d}}.', + 'cedar.{{version:03d}}.']), + 5600: 'jbs_{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 5700: 'spm{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5720: 'dbm{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5860: 'stm{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5900: 'sfm{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5950: 'rbm{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 5980: 'eum{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 6205: 'akl{{year:02d}}{{month:02d}}{{day:02d}}g.*.', + 6206: 'asl{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 6300: 'uil{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 6320: 'Colorado State sodium lidar', + 6330: 'usl{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 6340: 'alo*.', + 6350: 'nlo*.', + 6360: 'cul*.', + 6370: 'unl*.', + 6380: 'pfl{{year:04d}}{{month:02d}}{{day:02d}}_{{cycle:03d}}.', + 7190: 'usi*.', + 7192: 'amp{{year:02d}}{{month:02d}}{{day:02d}}?.{{version:03d}}.', + 7200: 'mhi{{year:04d}}{{month:02d}}{{day:02d}}.{kindat}.', + 7201: 'aai{{year:04d}}{{month:02d}}{{day:02d}}.{kindat}.', + 7202: 'abi{{year:04d}}{{month:02d}}{{day:02d}}.{kindat}.', + 7203: 'eai{{year:04d}}{{month:02d}}{{day:02d}}.{kindat}.', + 7204: 'mai{{year:04d}}{{month:02d}}{{day:02d}}.{kindat}.', + 7205: 'rai{{year:04d}}{{month:02d}}{{day:02d}}.{kindat}.', + 7206: 'jci{{year:04d}}{{month:02d}}{{day:02d}}.{kindat}.', + 7240: 'mhi*.', + 7580: 'qac*.', + 11: 'j??*{{year:02d}}{{month:02d}}{{day:02d}}g.{{version:03d}}.', + 840: 'jul{{year:04d}}{{month:02d}}{{day:02d}}_esf.{{version:03d}}.', + 3000: 'utx*.', + 8001: '????_?_??.gps_all.out.', + 8000: '*{{year:02d}}{{month:02d}}{{day:02d}}*.{{version:03d}}.', + 8002: '????_?_??.gps_all.out.', + 8010: 'scin_{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 3010: 'dvs{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 3320: 'wup{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 4470: 'p4p{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 4473: 'y4p{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.', + 4480: 'arp{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 4481: 'agp{{year:04d}}{{month:02d}}{{day:02d}}.{{version:03d}}.', + 7191: 'mtm{{year:02d}}{{month:02d}}{{day:02d}}g.{kindat}.'} + + # Warn if file format not available + msg = "" + if inst_code not in format_str.keys(): + msg = "".join(["file format string not available for ", + "instrument code {:d}: ".format(inst_code)]) + fstr = "*." + else: + fstr = format_str[inst_code] + + # Warn if file format has multiple '*' wildcards + num_wc = len(fstr.split("*")) + if num_wc >= 3: + msg = "".join(["file format string has multiple '*' ", + "wildcards, may not be parsable by pysat"]) + elif fstr.find('{{year') < 0 and fstr != "*.": + msg = "".join(["file format string missing date info, ", + "may not be parsable by pysat"]) + elif num_wc > 1: + nspec_sec = 0 + for fsplit in fstr.split("*"): + if fsplit.find("}}") > 0 and fsplit.find("{{") >= 0: + nspec_sec += 1 + + if nspec_sec > 1: + msg = "".join(["file format string has '*' between formatting", + " constraints, may not be parsable by pysat"]) + + if len(msg) > 0: + if strict: + raise ValueError(msg) + elif verbose: + logger.warning(msg) + + fstr += "{file_type}" + + return fstr + + def load(fnames, tag='', inst_id='', xarray_coords=None): """Loads data from Madrigal into Pandas or XArray. @@ -459,7 +917,7 @@ def download(date_array, inst_code=None, kindat=None, data_path=None, return -def get_remote_filenames(inst_code=None, kindat=None, user=None, password=None, +def get_remote_filenames(inst_code=None, kindat='', user=None, password=None, web_data=None, url="http://cedar.openmadrigal.org", start=dt.datetime(1900, 1, 1), stop=dt.datetime.now(), date_array=None): @@ -470,10 +928,10 @@ def get_remote_filenames(inst_code=None, kindat=None, user=None, password=None, inst_code : str or NoneType Madrigal instrument code(s), cast as a string. If multiple are used, separate them with commas. (default=None) - kindat : str or NoneType + kindat : str Madrigal experiment code(s), cast as a string. If multiple are used, separate them with commas. If not supplied, all will be returned. - (default=None) + (default='') data_path : str or NoneType Path to directory to download data to. (default=None) user : str or NoneType @@ -487,10 +945,12 @@ def get_remote_filenames(inst_code=None, kindat=None, user=None, password=None, (default=None) url : str URL for Madrigal site (default='http://cedar.openmadrigal.org') - start : dt.datetime - Starting time for file list (defaults to 01-01-1900) - stop : dt.datetime - Ending time for the file list (defaults to time of run) + start : dt.datetime or NoneType + Starting time for file list, None reverts to default + (default=dt.datetime(1900, 1, 1)) + stop : dt.datetime or NoneType + Ending time for the file list, None reverts to default + (default=dt.datetime.utcnow()) date_array : dt.datetime or NoneType Array of datetimes to download data for. The sequence of dates need not be contiguous and will be used instead of start and stop if supplied. @@ -522,7 +982,7 @@ def get_remote_filenames(inst_code=None, kindat=None, user=None, password=None, _check_madrigal_params(inst_code=inst_code, user=user, password=password) - if kindat is None: + if kindat in ['', '*']: kindat = [] else: kindat = [int(kk) for kk in kindat.split(",")] @@ -534,9 +994,18 @@ def get_remote_filenames(inst_code=None, kindat=None, user=None, password=None, date_array)) start = date_array.min() stop = date_array.max() + + # If NoneType was supplied for start or stop, set to defaults + if start is None: + start = dt.datetime(1900, 1, 1) + + if stop is None: + stop = dt.datetime.utcnow() + # If start and stop are identical, increment if start == stop: stop += dt.timedelta(days=1) + # Open connection to Madrigal if web_data is None: web_data = madrigalWeb.MadrigalData(url) @@ -550,11 +1019,12 @@ def get_remote_filenames(inst_code=None, kindat=None, user=None, password=None, # Iterate over experiments to grab files for each one files = list() - logger.info("Found {:d} Madrigal experiments".format(len(exp_list))) + istr = "Found {:d} Madrigal experiments between {:s} and {:s}".format( + len(exp_list), start.strftime('%d %B %Y'), stop.strftime('%d %B %Y')) + logger.info(istr) for exp in exp_list: if good_exp(exp, date_array=date_array): file_list = web_data.getExperimentFiles(exp.id) - if len(kindat) == 0: files.extend(file_list) else: @@ -718,8 +1188,12 @@ def list_remote_files(tag, inst_id, inst_code=None, kindats=None, user=None, # Parse these filenames to grab out the ones we want logger.info("Parsing filenames") - stored = pysat.utils.files.parse_fixed_width_filenames(filenames, - format_str) + if format_str.find('*') < 0: + stored = pysat.utils.files.parse_fixed_width_filenames(filenames, + format_str) + else: + stored = pysat.utils.files.parse_delimited_filenames(filenames, + format_str, '.') # Process the parsed filenames and return a properly formatted Series logger.info("Processing filenames") @@ -727,10 +1201,10 @@ def list_remote_files(tag, inst_id, inst_code=None, kindats=None, user=None, two_digit_year_break) -def list_files(tag, inst_id, data_path=None, format_str=None, +def list_files(tag, inst_id, data_path, format_str=None, supported_tags=None, file_cadence=dt.timedelta(days=1), two_digit_year_break=None, delimiter=None, file_type=None): - """Return a Pandas Series of every file for chosen Instrument data. + """Create a Pandas Series of every file for chosen Instrument data. Parameters ---------- @@ -740,9 +1214,8 @@ def list_files(tag, inst_id, data_path=None, format_str=None, inst_id : str Specifies the instrument ID to load. Accepts strings corresponding to the appropriate Madrigal Instrument `inst_ids`. - data_path : str or NoneType - Path to data directory. If None is specified, the value previously - set in Instrument.files.data_path is used. (default=None) + data_path : str + Path to data directory. format_str : str or NoneType User specified file format. If None is specified, the default formats associated with the supplied tags are used. (default=None) @@ -876,13 +1349,19 @@ def _check_madrigal_params(inst_code, user, password): """ - if inst_code is None: - raise ValueError("Must supply Madrigal instrument code") + inst_codes = known_madrigal_inst_codes(None) + + if str(inst_code) not in inst_codes.keys(): + raise ValueError(''.join(["Unknown Madrigal instrument code: ", + repr(inst_code), ". If this is a valid ", + "Madrigal instrument code, please update ", + "`pysatMadrigal.instruments.methods.general", + ".known_madrigal_inst_codes`."])) if not (isinstance(user, str) and isinstance(password, str)): raise ValueError(' '.join(("The madrigal database requries a username", "and password. Please input these as", - "user='firstname+lastname' and", + "user='firstname lastname' and", "password='myname@email.address' in this", "function."))) diff --git a/pysatMadrigal/instruments/templates/madrigal_pandas.py b/pysatMadrigal/instruments/templates/madrigal_pandas.py deleted file mode 100644 index a24e0a8f..00000000 --- a/pysatMadrigal/instruments/templates/madrigal_pandas.py +++ /dev/null @@ -1,181 +0,0 @@ -# -*- coding: utf-8 -*-. -"""Supports generalized access to Madrigal Data. - -To use this routine, you need to know both the Madrigal Instrument code -as well as the data tag numbers that Madrigal uses to uniquely identify -data sets. Using these codes, the methods.madrigal.py routines will -be used to support downloading and loading of data. - -Downloads data from the Madrigal Database. - -Warnings --------- -All data downloaded under this general support is placed in the same directory, -pysat_data_dir/madrigal/pandas/. For technical reasons, the file search -algorithm for pysat's Madrigal support is set to permissive defaults. Thus, all -instrument files downloaded via this interface will be picked up by the madrigal -pandas pysat Instrument object unless the file_format keyword is used at -instantiation. - -Files can be safely downloaded without knowing the file_format keyword, -or equivalently, how Madrigal names the files. See `Examples` for more. - -Properties ----------- -platform - 'madrigal' -name - 'pandas' -tag - madrigal instrument code as an integer -inst_id - madrigal kindat as a string - -Examples --------- -:: - - # For isolated use of a madrigal data set - import pysat - # Download DMSP data from Madrigal - dmsp = pysat.Instrument('madrigal', 'pandas', inst_code=8100, - kindat='10241') - dmsp.download(dt.datetime(2017, 12, 30), dt.datetime(2017, 12, 31), - user='Firstname+Lastname', password='email@address.com') - dmsp.load(2017, 363) - - # For users that plan on using multiple Madrigal datasets - # using this general interface then an additional parameter - # should be supplied upon instrument instantiation (file_format) - - # pysat needs information on how to parse filenames from Madrigal - # for the particular instrument under study. - # When starting from scratch (no files), this is a two step process. - # First, get atleast one file from Madrigal, using the steps above - # using the file downloaded. Using the filename, convert it to a template - # string - # and pass that to pysat when instantiating future Instruments. - - # For example, one of the files downloaded above is - # dms_ut_19980101_11.002.hdf5 - # pysat needs a template for how to pull out the year, month, day, and, - # if available, hour, minute, second, etc. - # the format/template string for this instrument is - # 'dms_ut_{year:4d}{month:02d}{day:02d}_12.002.hdf5', following - # python standards for string templates/Formatters - # https://docs.python.org/2/library/string.html - - # The complete instantiation for this instrument is - file_fmt = 'dms_ut_{year:4d}{month:02d}{day:02d}_11.002.hdf5' - dmsp = pysat.Instrument('madrigal', 'pandas', inst_code=8100, - kindat='10241', file_format=file_fmt) - -Note ----- -Please provide name and email when downloading data with this routine. - -""" - -import functools - -from pysat.instruments.methods import general as ps_gen -from pysat import logger - -from pysatMadrigal.instruments.methods import general - -# ---------------------------------------------------------------------------- -# Instrument attributes - -platform = 'madrigal' -name = 'pandas' -tags = {'': 'General pysat Madrigal data access.'} -inst_ids = {'': list(tags.keys())} - -pandas_format = True - -# Local attributes -# -# Need a way to get the filename strings for a particular instrument unless -# wildcards start working -fname = '*{year:4d}{month:02d}{day:02d}*.{version:03d}.hdf5' -supported_tags = {ss: {tt: fname for tt in inst_ids[ss]} - for ss in inst_ids.keys()} -remote_tags = {ss: {kk: supported_tags[ss][kk].format(file_type='hdf5') - for kk in inst_ids[ss]} for ss in inst_ids.keys()} - -# ---------------------------------------------------------------------------- -# Instrument test attributes - -# Need to sort out test day setting for unit testing, maybe through a remote -# function -# _test_dates = {'': {'': dt.datetime(2010, 1, 19)}} - -# ---------------------------------------------------------------------------- -# Instrument methods - - -def init(self): - """Initializes the Instrument object in support of Madrigal access - - Runs once upon instantiation. - - Parameters - ---------- - self : pysat.Instrument - This object - - """ - - logger.info(general.cedar_rules()) - self.acknowledgements = general.cedar_rules() - self.references = 'Please remember to cite the instrument articles.' - - self.inst_code = self.kwargs['inst_code'] - self.kindat = self.kwargs['kindat'] - - return - - -def clean(self): - """Placeholder routine that would normally return cleaned data - - Note - ---- - Supports 'clean', 'dusty', 'dirty' in the sense that it prints - a message noting there is no cleaning. - 'None' is also supported as it signifies no cleaning. - - Routine is called by pysat, and not by the end user directly. - - """ - - if self.clean_level in ['clean', 'dusty', 'dirty']: - logger.warning('Generalized Madrigal data support has no cleaning.') - - return - - -# ---------------------------------------------------------------------------- -# Instrument functions -# -# Use the default Madrigal and pysat methods - -# Set the list_remote_files routine -# Need to fix this -# list_remote_files = functools.partial(general.list_remote_files, -# inst_code=self.kwargs['inst_code'], -# kindats=self.kwargs['kindat'], -# supported_tags=remote_tags) - -# Set the load routine -load = general.load - -# Set the list routine -list_files = functools.partial(ps_gen.list_files, - supported_tags=supported_tags) - -# Set up the download routine -# Needs to be fixed -# download = functools.partial(general.download, -# inst_code=str(self.kwargs['inst_code']), -# kindat=self.kwargs['kindat']) diff --git a/pysatMadrigal/tests/test_methods_general.py b/pysatMadrigal/tests/test_methods_general.py index a14ddefc..16cf9cce 100644 --- a/pysatMadrigal/tests/test_methods_general.py +++ b/pysatMadrigal/tests/test_methods_general.py @@ -9,8 +9,9 @@ import gzip import logging import numpy as np -import tempfile import os +from packaging import version +import tempfile import netCDF4 as nc from madrigalWeb import madrigalWeb @@ -21,10 +22,6 @@ from pysatMadrigal.instruments.methods import general -# Get the pysat version for skipping tests that currently require the -# develop branch -pv_major, pv_minor, pv_bug = [int(ps) for ps in pysat.__version__.split(".")] - class TestLocal(object): """Unit tests for general methods that run locally.""" @@ -61,8 +58,8 @@ def test_empty_load(self, xarray_coords): assert self.out[1] == pysat.Meta() return - @pytest.mark.skipif(pv_major < 3 or (pv_major == 3 and pv_minor == 0 - and pv_bug <= 1), + @pytest.mark.skipif(version.Version(pysat.__version__) + < version.Version('3.1.0'), reason="requires newer pysat version.") @pytest.mark.parametrize("pad", [None, pds.DateOffset(days=2)]) def test_filter_data_single_date(self, pad): @@ -96,13 +93,139 @@ def test_filter_data_single_date(self, pad): return + @pytest.mark.parametrize("pandas_format", [None, True, False]) + def test_known_madrigal_inst_codes(self, pandas_format): + """Test the output that specifies known Madrigal instrument codes. + + Parameters + ---------- + pandas_format : bool or NoneType + Separate instrument codes by time-series (True) or multi-dimensional + data types (False) if a boolean is supplied, or supply all if + NoneType (default=None) + + """ + + self.out = general.known_madrigal_inst_codes(pandas_format) + + assert isinstance(self.out, dict) + + if pandas_format is not False: + assert '120' in self.out.keys() + + if pandas_format is not True: + assert '10' in self.out.keys() + + return + + @pytest.mark.parametrize("inst_code", [120, 120.0, "120"]) + def test_madrigal_file_format_str(self, inst_code): + """Test the file format string for known Madrigal instrument codes. + + Parameters + ---------- + inst_code : int, float, or str + Madrigal instrument code for a well-defined file format + + """ + + # Get the function output + self.out = general.madrigal_file_format_str(inst_code) + + # Test the formatted string + for req_str in ['year', 'month', 'day', 'file_type']: + assert self.out.find(req_str) > 0, "{:s} not in {:s}".format( + req_str, self.out) + return + + @pytest.mark.parametrize("inst_code, msg", [ + (8001, "file format string missing date info"), + (1, "file format string not available for instrument code"), + (8000, "file format string has multiple '*'"), + (8400, "file format string has '*' between formatting constraints")]) + def test_madrigal_file_format_str_with_warnings(self, inst_code, msg, + caplog): + """Test poorly constrained file formats for Madrigal instrument codes. + + Parameters + ---------- + inst_code : int + Madrigal instrument code for a poorly constrained file format + msg : str + Logger warning message + + """ + + # Get the output and raise the logging warning + with caplog.at_level(logging.WARN, logger='pysat'): + self.out = general.madrigal_file_format_str(inst_code) + + # Test the formatted string + assert self.out.find("file_type") > 0, \ + "'file_type' missing from {:s}".format(self.out) + + # Test the logger warning + assert len(caplog.records) == 1, "unexpected number of warnings" + assert caplog.records[0].levelname == "WARNING" + assert caplog.records[0].message.find(msg) >= 0 + return + + @pytest.mark.parametrize("inst_code", [8001, 1, 8000, 8400]) + def test_madrigal_file_format_str_quiet_warnings(self, inst_code, caplog): + """Test quiet, poorly constrained file formats for Madrigal inst codes. + + Parameters + ---------- + inst_code : int + Madrigal instrument code for a poorly constrained file format + + """ + + # Get the output and raise the logging warning + with caplog.at_level(logging.WARN, logger='pysat'): + self.out = general.madrigal_file_format_str(inst_code, + verbose=False) + + # Test the formatted string + assert self.out.find("file_type") > 0, \ + "'file_type' missing from {:s}".format(self.out) + + # Test the logger warning + assert len(caplog.records) == 0, "unexpected number of warnings" + return + + @pytest.mark.parametrize("inst_code, msg", [ + (8001, "file format string missing date info"), + (1, "file format string not available for instrument code"), + (8000, "file format string has multiple '*'"), + (8400, "file format string has '*' between formatting constraints")]) + def test_madrigal_file_format_str_with_errors(self, inst_code, msg): + """Test poorly constrained file formats raise ValueErrors. + + Parameters + ---------- + inst_code : int + Madrigal instrument code for a poorly constrained file format + msg : str + Logger warning message + + """ + + # Get the output and raise the logging warning + with pytest.raises(ValueError) as verr: + general.madrigal_file_format_str(inst_code, strict=True) + + # Test the logger warning + assert str(verr).find(msg) >= 0 + return + class TestErrors(object): """Tests for errors raised by the general methods.""" def setup(self): """Create a clean testing setup.""" - self.kwargs = {'inst_code': 'inst_code', + self.kwargs = {'inst_code': '10', 'user': 'username', 'password': 'password', 'kindats': {'testing': {'tag': 1000}}, @@ -114,17 +237,25 @@ def teardown(self): del self.kwargs return - def test_check_madrigal_params_no_code(self): - """Test that an error is thrown if None is passed through.""" + @pytest.mark.parametrize("inst_code", [None, "-47"]) + def test_check_madrigal_params_no_code(self, inst_code): + """Test that an error is thrown if None is passed through. + + Parameters + ---------- + inst_code : str or NoneType + A bad Madrigal instrument code + + """ # Set up the kwargs for this test del self.kwargs['kindats'], self.kwargs['supported_tags'] - self.kwargs['inst_code'] = None + self.kwargs['inst_code'] = inst_code # Get the expected error message and evaluate it with pytest.raises(ValueError) as verr: general._check_madrigal_params(**self.kwargs) - assert str(verr).find("Must supply Madrigal instrument code") >= 0 + assert str(verr).find("Unknown Madrigal instrument code") >= 0 return @pytest.mark.parametrize("bad_val", [None, 17, False, 12.34]) diff --git a/requirements.txt b/requirements.txt index 27c30233..25441d7b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ h5py madrigalWeb numpy +packaging pandas -pysat>=3.0 +pysat>=3.0.3 xarray diff --git a/setup.cfg b/setup.cfg index 4c918210..ca766151 100644 --- a/setup.cfg +++ b/setup.cfg @@ -41,13 +41,13 @@ setup_requires = setuptools >= 38.6; pip >= 10 include_package_data = True zip_safe = False packages = find: -install_requires = - h5py - madrigalWeb - numpy - pandas - pysat - xarray +install_requires = h5py + madrigalWeb + numpy + packaging + pandas + pysat + xarray [coverage:report] omit = */instruments/templates/*