In [7]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
import glob
import xarray as xr
from numpy.lib.stride_tricks import as_strided as strided

In [8]:
# loop to read in all files in a single folder

path = '/projects/sebu3651/ProjectResearch/Wasco/WascoLidar/CU'
all_files = sorted(glob.glob(path + "/*.sta"))

li = []

for filename in all_files:
#     print(filename)
    df = pd.read_csv(filename,sep='\t',header=56, encoding='iso-8859-1')
    li.append(df)

WasWSdf = pd.concat(li, axis=0, ignore_index=True)
WasWSdf.set_index('Date',inplace=True)

In [9]:
# renaming columns to be more intuitive
WasWSdf = WasWSdf.rename({'Vhm1': '40m', 'Vhm2': '60m','Vhm3': '80m','Vhm4': '100m','Vhm5': '120m','Vhm6': '140m','Vhm7': '160m','Vhm8': '180m','Vhm9': '200m','Vhm10': '220m'}, axis=1)

In [10]:
# defining a function to fill NaN values with a linear interpolation

def mask_knans(a, x):
    a = np.asarray(a)
    k = a.size
    n = np.append(np.isnan(a), [False] * (x - 1))
    m = np.empty(k, np.bool8)
    m.fill(True)

    s = n.strides[0]
    i = np.where(strided(n, (k + 1 - x, x), (s, s)).all(1))[0][:, None]
    i = i + np.arange(x)
    i = pd.unique(i[i < k])

    m[i] = False

    return m

In [11]:
# creating a copy of the dataframe and applying linear interpolation function at each height

WasWSdfIntp=WasWSdf

# 15 is used as the x value for the function because interpolation occurs if there is a usable point within 15x 2-minute intervals (within 30 min)
WasWSdfIntp['40m']=WasWSdf['40m'].interpolate().where(mask_knans(WasWSdf['40m'].tolist(),15))
WasWSdfIntp['60m']=WasWSdf['60m'].interpolate().where(mask_knans(WasWSdf['60m'].tolist(),15))
WasWSdfIntp['80m']=WasWSdf['80m'].interpolate().where(mask_knans(WasWSdf['80m'].tolist(),15))
WasWSdfIntp['100m']=WasWSdf['100m'].interpolate().where(mask_knans(WasWSdf['100m'].tolist(),15))
WasWSdfIntp['120m']=WasWSdf['120m'].interpolate().where(mask_knans(WasWSdf['120m'].tolist(),15))
WasWSdfIntp['140m']=WasWSdf['140m'].interpolate().where(mask_knans(WasWSdf['140m'].tolist(),15))
WasWSdfIntp['160m']=WasWSdf['160m'].interpolate().where(mask_knans(WasWSdf['160m'].tolist(),15))
WasWSdfIntp['180m']=WasWSdf['180m'].interpolate().where(mask_knans(WasWSdf['180m'].tolist(),15))
WasWSdfIntp['200m']=WasWSdf['200m'].interpolate().where(mask_knans(WasWSdf['200m'].tolist(),15))

In [12]:
# creating copy of interpolated dataframe and resampling 2-minute outputs hourly

WasWSdfIntpHr=WasWSdf
WasWSdfIntpHr.index=pd.to_datetime(WasWSdfIntpHr.index)

# taking hourly mean at each height
WasWS40IntpHr=WasWSdfIntpHr['40m'].resample('1H').mean()
WasWS60IntpHr=WasWSdfIntpHr['60m'].resample('1H').mean()
WasWS80IntpHr=WasWSdfIntpHr['80m'].resample('1H').mean()
WasWS100IntpHr=WasWSdfIntpHr['100m'].resample('1H').mean()
WasWS120IntpHr=WasWSdfIntpHr['120m'].resample('1H').mean()
WasWS140IntpHr=WasWSdfIntpHr['140m'].resample('1H').mean()
WasWS160IntpHr=WasWSdfIntpHr['160m'].resample('1H').mean()
WasWS180IntpHr=WasWSdfIntpHr['180m'].resample('1H').mean()
WasWS200IntpHr=WasWSdfIntpHr['200m'].resample('1H').mean()

# concatenating each height series into a single dataframe that displays hourly data
WasWSdfIntpHr=pd.concat((WasWS40IntpHr,WasWS60IntpHr,WasWS80IntpHr,WasWS100IntpHr,WasWS120IntpHr,WasWS140IntpHr,WasWS160IntpHr,WasWS180IntpHr,WasWS200IntpHr),axis=1)

In [13]:
WasWSdf.to_csv('WascoLidar2min')
WasWSdfIntp.to_csv('WascoLidarInterpolated2min')
WasWSdfIntpHr.to_csv('WascoLidarInterpolatedHourly')