# Merge CUES observations into a single file

From directories of CUES radiation and temperature level 1 csv files, align and merge while doing some data cleanup. Save out as a pandas dataframe to a pickle file.

---

In [1]:
import pandas as pd
import numpy as np
import os
import datetime as dt
import xarray as xr

In [2]:
def getListOfFiles(dirName):
    # create a list of file and sub directories 
    # names in the given directory 
    # https://thispointer.com/python-how-to-get-list-of-files-in-directory-and-sub-directories/
    listOfFile = os.listdir(dirName)
    allFiles = list()
    # Iterate over all the entries
    for entry in listOfFile:
        # Create full path
        fullPath = os.path.join(dirName, entry)
        # If entry is a directory then get the list of files in this directory 
        if os.path.isdir(fullPath):
            allFiles = allFiles + getListOfFiles(fullPath)
        else:
            allFiles.append(fullPath)
                
    return allFiles 

def getCUESdata(dirName):
    # Get all the files in this directory we'll want to open
    cues_files = getListOfFiles(dirName)
    
    # Open all the files and concat together in a pandas dataframe
    cues_data = []
    _ = [cues_data.append(pd.read_csv(i)) for i in cues_files]
    cues_data = pd.concat(cues_data)
    
    # Convert the 'DateTime' text strings to pandas datetime objects
    cues_data['datetime']  = pd.to_datetime(cues_data['MeasDateTime'])
    
    #Convert pandas dataframe to xarray dataset and and make our local time datetimes the index
    _cues_data = cues_data.set_index('datetime')
    _cues_data.sort_index(inplace=True)
    cues_ds = _cues_data.to_xarray()
    
    # drop old time field
    cues_ds = cues_ds.drop_vars(names='MeasDateTime')
    
    return cues_ds

---
## Open data files

Specify directories containing CUES Level 1 csv files for radiation and temperature data:

In [3]:
temp_directory = r'data\temperature'
rad_directory = r'data\radiation'
wind_directory = r'data\wind'
precip_directory = r'data\precipitation'

Open the CUES data files, and concatenate them together

In [4]:
# temperature
cues_temp = getCUESdata(temp_directory)
# select only the unique datetime indices we have now (But why are there duplicates and where are they?)
_, index = np.unique(cues_temp['datetime'], return_index=True)
cues_temp = cues_temp.isel(datetime=index)

# radiation
cues_rad = getCUESdata(rad_directory)

# wind
cues_wind = getCUESdata(wind_directory)

Open the 3D wind vector data from the sonic anemometer

In [5]:
cues_3d_wind = pd.read_csv(r'data\CUESWinds201703through201704.csv')
# Convert the 'DateTime' text strings to pandas datetime objects
cues_3d_wind['datetime']  = pd.to_datetime(cues_3d_wind['TIMESTAMP'])
#Convert pandas dataframe to xarray dataset and and make our local time datetimes the index
_cues_3d_wind = cues_3d_wind.set_index('datetime')
_cues_3d_wind.sort_index(inplace=True)
cues_3d_wind = _cues_3d_wind.to_xarray()

Merge the rad and temp datasets together, then save out to a file.

In [6]:
cues_ds = xr.merge([cues_temp, cues_rad, cues_wind, cues_3d_wind])
# Convert back to pandas dataframe, save out to pickle file
cues_ds.to_dataframe().to_pickle('data/CUES_L1_Temp_Rad_Wind_Mar-Apr2017.pkl')

---
Open the CUES precip data (these are daily values, I won't merge with the rest of the data)

In [7]:
cues_precip = getCUESdata(precip_directory)
cues_precip.to_dataframe().to_pickle('data/CUES_L1_Precip_Mar-Apr2017.pkl')