# Merge CUES and GOES datasets

Read in our CUES and GOES datasets, set up the time bounds we want to look at, and resample everything to 5 minute intervals.

Then merge the GOES brightness temperature observations with the CUES dataset, and output the merged dataset for analysis elsewhere.

In [1]:
import pandas as pd
import numpy as np
import os
import datetime as dt
import xarray as xr

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def getListOfFiles(dirName):
    # create a list of file and sub directories 
    # names in the given directory 
    # https://thispointer.com/python-how-to-get-list-of-files-in-directory-and-sub-directories/
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
                
    return allFiles 

---
#### Open CUES data file:

In [3]:
cues_ds = pd.read_pickle('CUES_L1_Temp_Rad_2017-2019_lw.pkl')
cues_ds = cues_ds.to_xarray()

#### Open GOES observations for the CUES site:

In [4]:
# Find all our GOES files
directory = r'\\j-lundquist-3.ce.washington.edu\storage\GOES\pkl\CUES'
goes_files = getListOfFiles(directory)
# Read in the files with pandas
goes = []
_ = [goes.append(pd.read_pickle(i)) for i in goes_files]
goes = pd.concat(goes)
# Change GOES time values from UTC to UTC-7 for CUES site:
goes = goes.rename(columns={'time': 'timeUTC'})
goes['datetime'] = goes['timeUTC'] - pd.Timedelta(hours=7)
goes = goes.drop(columns=['timeUTC'])
# Convert pandas dataframe to xarray dataset and and make our local time datetimes the index
_goes = goes.set_index('datetime')
_goes.sort_index(inplace=True)
_goes = _goes.loc[~_goes.index.duplicated(keep='first')] # there are duplicates somewhere in the GOES data, remove them
_goes = _goes.loc[_goes.index >= pd.datetime(2017,2,28)]# there's also some values before 2017, remove them
goes_ds = _goes.to_xarray()

  app.launch_new_instance()


---
#### Set time bounds and resample

Select only the overlapping time period we want, and resample all of these to 5 minute means, ignoring any nan values.

(CUES provides ~1-minute observations, GOES is 5- or 10-minute)

In [5]:
startdate = '2017-03-01'
enddate = '2019-12-31'

cues_ds_5min = cues_ds.sel(datetime=slice(startdate, enddate)).resample(datetime='5min').reduce(np.nanmean)
goes_ds_5min = goes_ds.sel(datetime=slice(startdate, enddate)).resample(datetime='5min').reduce(np.nanmean)

  data = func(input_data, axis=axis, **kwargs)


#### Merge the datasets

Merge GOES brightness temperature into the CUES dataset

In [6]:
# Find nearest GOES Tb observation given a CUES datetime
_goes_ds = goes_ds_5min.sel(datetime=cues_ds_5min.datetime)
cues_ds_5min['goes_tb_c'] = (['datetime'],  _goes_ds.tb)

#### Export this merged dataset

In [7]:
cues_ds_5min.to_dataframe().to_pickle('goes-cues_2017-2019.pkl')