In [None]:
import os
from astropy.time import Time 
import numpy as np
from json import dump as _dump


In [None]:
SUOMI_DIR = './suomi_data' # Location of SuomiNet data


In [None]:
def get_suomi_paths():
    """Create a dictionary of filepaths for locally stored SuomiNet data

    Iterate over the files in SUOMI_DIR and create a dictionary of the form
    {year (int): [paths of data files for year (str)]}

    Returns:
        suomi_data (dict): A dictionary of available SuomiNet data
    """

    suomi_data = {}

    files = [f for f in os.listdir(SUOMI_DIR)]
    for file in files:
        if file.endswith('.plot'):
            if int(file[-9:-5]) not in suomi_data.keys():
                suomi_data[int(file[-9:-5])] = []

            suomi_data[int(file[-9:-5])].append(os.path.join(SUOMI_DIR, file))

    return suomi_data


In [None]:
def read_data(path):
    """Returns contents of a SuomiNet data file as a numpy array

    Expects data files from http://www.suominet.ucar.edu/data.html
    under the "Specific station - All year hourly" row. The returned
    array has column names 'date', 'pwv', 'pres', 'temp', and 'hum'.
    
    Args:
        path (str): File path to be read

    Returns:
        data (numpy.ndarray): numpy array with data from file
    """
    
    data = np.genfromtxt(path, usecols=(1,2,7,8,9),
                         names=('date', 'pwv', 'pres', 'temp', 'hum'),
                         dtype=((np.str_, 16), float, float, float, float))

    data = np.unique(data) # Sometimes SuomiNet records duplicate entries
    return data


In [None]:
def get_date_list(data_arrays):
    """Construct a sorted list of unique dates from a collection of arrays

    Given multiple numpy arrays, create a sorted list of the unique dates
    found in all of the arrays. Expects arrays returned by 'read_data'.

    Args:
        data_arrays (list): list of numpy arrays returned by 'read_data'
    
    Returns:
        mjd (list): Sorted list of unique datetimes expressed in MJD
    """

    datetimes = np.concatenate([array['date'] for array in data_arrays])
    # [array['date'] for array in data_arrays] is a list of arrays each having
    # only the datetime info; np.concatenate combines these into a single array
    
    unique_datetimes = np.unique(datetimes)
    mjd = sorted([Time(t, format='isot').mjd for t in unique_datetimes])
    
    return mjd


In [None]:
def pad_data(dates, data):
    """Pad and mask an array of PWV values to match a list of dates

    Given an array of PWV measurements and their corresponding datetimes, pad
    the array so that there is an entry for every datetime in a given list.
    Expects the first argument to be a return from 'get_dates' and the second
    second argument to be from 'read_data'.
    
    Returns:
        padded_data (list): A padded data array
    """

    mask, pwv_list = [], []

    # Get datetime values from argument `data` and express them in mjd format
    times_mjd = [Time(elt[0], format='isot').mjd for elt in data] 

    for date in dates:
        if date in times_mjd:
            # Get the corresponding PWV values
            time = Time(date, format='mjd').isot[:-7]
            ind = np.where(data['date'] == time)
            pwv = data[ind]['pwv']

            if len(pwv) == 1 and pwv > 0: # Eliminate cases with multiple vals
                pwv_list.append(np.asscalar(pwv))
                mask.append(0)
                continue
   
        mask.append(1)
        pwv_list.append(1) # Filler value

    padded_data = np.ma.masked_array(data=pwv_list, mask=mask)
    return padded_data


In [None]:
def get_padded_data():
    """Read all available SuomiNet data from file and pad the resulting arrays

    Read all locally available SuomiNet data from file using `read_data`. Pad
    the resulting arrays so that different arrays with measurments from the
    same year are of the same length. Return the padded data in a dictionary
    of the form {year (int): {gps reciever (str): PWV data (masked_array)}}.
    
    Returns:
        all_data (dict): A composite dictionary containing padded data arrays
    """

    all_data = dict()
    # This dict will store SuomiNet data in the form
    # {year (int): {gps reciever (str): PWV data (masked_array)}}

    suomi_data = get_suomi_paths()
    for yr, flist in suomi_data.items():
        # Read data from SuomiNet file paths
        data = {os.path.basename(f)[:4].upper(): read_data(f) for f in flist}

        # Get list of unique dates
        dates = get_date_list(list(data.values()))

        # Pad the data arrays to match in length and add to all_data
        all_data[yr] = {site: pad_data(dates, array) for site, array in data.items()}
    
    return all_data


In [None]:
get_padded_data()

In [None]:
class gen_fit_funcs():
    """This class is used to generate fit_functions relating the PWV readout
    of recievers near Kitt Peak to the PWV on Kitt Peak. It is also used to
    create a model of the PWV on Kitt Peak over time.
    """

    def __init__(self):
        """Parse all locally available SuomiNet files and assign the data to
        the self.all_data attribute as a composite dictionary of the form
        {year (int): {gps reciever (str): PWV data (masked_array)}}.
        """
        
        self.all_data = get_padded_data() # This may take several minutes
        if not self.all_data:
            raise Exception('No SuomiNet Data Found')

    def create_linear_fits(self, path):
        """
        """

        pwv_fits = dict()

        # Create a fit for each reciever
        sites = set([key for subdict in self.all_data.values() for key in subdict])
        for reciever in sites - set(['KITT']):
            kdata = np.ma.masked_array([], []) # PWV data from Kitt Peak
            odata = np.ma.masked_array([], []) # PWV data from other sites

            for subdict in self.all_data.values():
                if 'KITT' in subdict and reciever in subdict:
                    kdata = np.ma.concatenate((kdata, subdict['KITT']))
                    odata = np.ma.concatenate((odata, subdict[reciever]))

            # Indices of data the is unmasked in both kdata and odata
            ind = np.invert(np.logical_and(kdata.mask, odata.mask))

            # Fit a first order polynomial relating odata to kdata
            kitt_pwv = np.extract(ind, kdata.data)
            other_pwv = np.extract(ind, odata.data)
            pwv_fits[reciever] = list(np.polyfit(other_pwv, kitt_pwv, 1))

        # Save parameters to file
        if not path.endswith('.json'): path += '.json'
        with open(path, 'w') as f: _dump(pwv_fits, f)


In [None]:
test = gen_fit_funcs()
test.create_linear_fits('test')

To do:
   * create pwv models from individual fits
   * mask values with pressure equal to cuttoff value