# Merge CUES observations into a single file

From directories of CUES radiation and temperature level 1 csv files, align and merge while doing some data cleanup. Save out as a pandas dataframe to a pickle file. (**Note**: CUES data are in UTC-8)

---

In [1]:
import pandas as pd
import numpy as np
import os
#import datetime as dt
import pytz
import xarray as xr

In [2]:
def getListOfFiles(dirName):
    # create a list of file and sub directories 
    # names in the given directory 
    # https://thispointer.com/python-how-to-get-list-of-files-in-directory-and-sub-directories/
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
                
    return allFiles 

---
## Open data files

Specify directories containing CUES Level 1 csv files for radiation and temperature data:

In [3]:
temp_directory = r'\\j-lundquist-3.ce.washington.edu\storage\CUES\Level 1 - Database\temperature'
rad_directory = r'\\j-lundquist-3.ce.washington.edu\storage\CUES\Level 1 - Database\radiation'
snowdepth_directory = r'\\j-lundquist-3.ce.washington.edu\storage\CUES\Level 1 - Database\snowdepth'

Open the CUES temperature data files, and concatenate them together

In [4]:
# Get all the files in this directory we'll want to open
cues_temp_files = getListOfFiles(temp_directory)

# Open all the files and concat together in a pandas dataframe
cues_temp_data = []
_ = [cues_temp_data.append(pd.read_csv(i)) for i in cues_temp_files]
cues_temp_data = pd.concat(cues_temp_data)

# Convert the 'DateTime' text strings to pandas datetime objects (this is UTC-8)
cues_temp_data['datetime']  = pd.to_datetime(cues_temp_data['MeasDateTime'])

#Convert pandas dataframe to xarray dataset and and make our local time datetimes the index
_cues_temp_data = cues_temp_data.set_index('datetime')
_cues_temp_data.sort_index(inplace=True)
cues_ds = _cues_temp_data.to_xarray()

# drop old time field
cues_ds = cues_ds.drop_vars(names='MeasDateTime')

# select only the unique datetime indices we have now (But why are there duplicates and where are they?)
_, index = np.unique(cues_ds['datetime'], return_index=True)
cues_ds = cues_ds.isel(datetime=index)

Open the CUES radiation data files, and concatenate them together

In [5]:
# Get all the files in this directory we'll want to open
cues_rad_files = getListOfFiles(rad_directory)

# Open all the files and concat together in a pandas dataframe
cues_rad_data = []
_ = [cues_rad_data.append(pd.read_csv(i)) for i in cues_rad_files]
cues_rad_data = pd.concat(cues_rad_data)

# Convert the 'DateTime' text strings to pandas datetime objects (this is UTC-8)
cues_rad_data['datetime']  = pd.to_datetime(cues_rad_data['MeasDateTime'])

# Convert pandas dataframe to xarray dataset and and make our local time datetimes the index
_cues_rad_data = cues_rad_data.set_index('datetime')
_cues_rad_data.sort_index(inplace=True)
cues_rad = _cues_rad_data.to_xarray()

# drop old time field
cues_rad = cues_rad.drop_vars(names='MeasDateTime')

Open the CUES snow depth data files, and concatenate them together

In [6]:
# Get all the files in this directory we'll want to open
cues_snowdepth_files = getListOfFiles(snowdepth_directory)

# Open all the files and concat together in a pandas dataframe
cues_snowdepth_data = []
_ = [cues_snowdepth_data.append(pd.read_csv(i)) for i in cues_snowdepth_files]
cues_snowdepth_data = pd.concat(cues_snowdepth_data)

# Convert the 'DateTime' text strings to pandas datetime objects (this is UTC-8)
cues_snowdepth_data['datetime']  = pd.to_datetime(cues_snowdepth_data['MeasDateTime'])

# Convert pandas dataframe to xarray dataset and and make our local time datetimes the index
_cues_snowdepth_data = cues_snowdepth_data.set_index('datetime')
_cues_snowdepth_data.sort_index(inplace=True)
cues_snowdepth = _cues_snowdepth_data.to_xarray()

# drop old time field
cues_snowdepth = cues_snowdepth.drop_vars(names='MeasDateTime')

---
# Merge datasets

Merge the rad and temp datasets together, then save out to a file.

In [7]:
# Should merge cleanly now
cues_ds = xr.merge([cues_ds, cues_rad, cues_snowdepth])

In [8]:
# Convert back to pandas dataframe, save out to pickle file
cues_ds.to_dataframe().to_pickle('CUES_L1_Temp_Rad_Snowdepth_2017-2020.pkl')