In [5]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import math
import xarray as xr
from numpy.lib.stride_tricks import as_strided as strided

In [6]:
# reading in all Vansycle lidar files
VanWS = xr.open_mfdataset('/projects/sebu3651/ProjectResearch/Vansycle/VansycleLidar/lidar.z08.b0.*.nc', combine= 'by_coords', concat_dim= 'time')
VanWSdf = VanWS.to_dataframe()

# setting time as index
VanWSdf.reset_index(inplace=True)
VanWSdf.set_index('time', inplace=True)

# deleting irrelevant columns
del VanWSdf['time_bounds']
del VanWSdf['bounds']
del VanWSdf['latitude']
del VanWSdf['longitude']
del VanWSdf['altitude']
del VanWSdf['wind_speed_std']
del VanWSdf['wind_direction']
del VanWSdf['vertical_air_velocity']
del VanWSdf['vertical_air_velocity_std']

# setting values to NaN instead of default large number
VanWSdf['wind_speed'].values[VanWSdf['wind_speed'].values>1000.0]=np.nan

In [7]:
# defining a function to fill NaN values with a linear interpolation

def mask_knans(a, x):
    a = np.asarray(a)
    k = a.size
    n = np.append(np.isnan(a), [False] * (x - 1))
    m = np.empty(k, np.bool8)
    m.fill(True)

    s = n.strides[0]
    i = np.where(strided(n, (k + 1 - x, x), (s, s)).all(1))[0][:, None]
    i = i + np.arange(x)
    i = pd.unique(i[i < k])

    m[i] = False

    return m

In [8]:
# creating a copy of the dataframe and applying linear interpolation function for each height
VanWSdfIntp=VanWSdf

# 3 is used as the x value for the function because interpolation occurs if there is a usable point within 3x 10-minute intervals (within 30 min)
VanWS40Int=VanWSdfIntp[VanWSdfIntp['height']==40].wind_speed.interpolate().where(mask_knans(VanWSdfIntp[VanWSdfIntp['height']==40].wind_speed,3))
VanWS40Intdf=pd.DataFrame(VanWS40Int)
VanWS40Intdf['height']=len(VanWS40Int)*[40]

VanWS60Int=VanWSdfIntp[VanWSdfIntp['height']==60].wind_speed.interpolate().where(mask_knans(VanWSdfIntp[VanWSdfIntp['height']==60].wind_speed,3))
VanWS60Intdf=pd.DataFrame(VanWS60Int)
VanWS60Intdf['height']=len(VanWS60Int)*[60]

VanWS80Int=VanWSdfIntp[VanWSdfIntp['height']==80].wind_speed.interpolate().where(mask_knans(VanWSdfIntp[VanWSdfIntp['height']==80].wind_speed,3))
VanWS80Intdf=pd.DataFrame(VanWS80Int)
VanWS80Intdf['height']=len(VanWS80Int)*[80]

VanWS100Int=VanWSdfIntp[VanWSdfIntp['height']==100].wind_speed.interpolate().where(mask_knans(VanWSdfIntp[VanWSdfIntp['height']==100].wind_speed,3))
VanWS100Intdf=pd.DataFrame(VanWS100Int)
VanWS100Intdf['height']=len(VanWS100Int)*[100]

VanWS120Int=VanWSdfIntp[VanWSdfIntp['height']==120].wind_speed.interpolate().where(mask_knans(VanWSdfIntp[VanWSdfIntp['height']==120].wind_speed,3))
VanWS120Intdf=pd.DataFrame(VanWS120Int)
VanWS120Intdf['height']=len(VanWS120Int)*[120]

VanWS140Int=VanWSdfIntp[VanWSdfIntp['height']==140].wind_speed.interpolate().where(mask_knans(VanWSdfIntp[VanWSdfIntp['height']==140].wind_speed,3))
VanWS140Intdf=pd.DataFrame(VanWS140Int)
VanWS140Intdf['height']=len(VanWS140Int)*[140]

VanWS160Int=VanWSdfIntp[VanWSdfIntp['height']==160].wind_speed.interpolate().where(mask_knans(VanWSdfIntp[VanWSdfIntp['height']==160].wind_speed,3))
VanWS160Intdf=pd.DataFrame(VanWS160Int)
VanWS160Intdf['height']=len(VanWS160Int)*[160]

VanWS180Int=VanWSdfIntp[VanWSdfIntp['height']==180].wind_speed.interpolate().where(mask_knans(VanWSdfIntp[VanWSdfIntp['height']==180].wind_speed,3))
VanWS180Intdf=pd.DataFrame(VanWS180Int)
VanWS180Intdf['height']=len(VanWS180Int)*[180]

VanWS200Int=VanWSdfIntp[VanWSdfIntp['height']==200].wind_speed.interpolate().where(mask_knans(VanWSdfIntp[VanWSdfIntp['height']==200].wind_speed,3))
VanWS200Intdf=pd.DataFrame(VanWS200Int)
VanWS200Intdf['height']=len(VanWS200Int)*[200]

VanWSdfInt=pd.concat((VanWS40Intdf,VanWS60Intdf,VanWS80Intdf,VanWS100Intdf,VanWS120Intdf,VanWS140Intdf,VanWS160Intdf,VanWS180Intdf,VanWS200Intdf))

In [9]:
# taking hourly mean at each height

VanWS40IntHr=VanWSdfInt[VanWSdfInt['height']==40].resample('1H').mean()
VanWS60IntHr=VanWSdfInt[VanWSdfInt['height']==60].resample('1H').mean()
VanWS80IntHr=VanWSdfInt[VanWSdfInt['height']==80].resample('1H').mean()
VanWS100IntHr=VanWSdfInt[VanWSdfInt['height']==100].resample('1H').mean()
VanWS120IntHr=VanWSdfInt[VanWSdfInt['height']==120].resample('1H').mean()
VanWS140IntHr=VanWSdfInt[VanWSdfInt['height']==140].resample('1H').mean()
VanWS160IntHr=VanWSdfInt[VanWSdfInt['height']==160].resample('1H').mean()
VanWS180IntHr=VanWSdfInt[VanWSdfInt['height']==180].resample('1H').mean()
VanWS200IntHr=VanWSdfInt[VanWSdfInt['height']==200].resample('1H').mean()

# concatenating each height series into a single dataframe that displays hourly data
VanWSdfIntHrdf=pd.concat((VanWS40IntHr,VanWS60IntHr,VanWS80IntHr,VanWS100IntHr,VanWS120IntHr,VanWS140IntHr,VanWS160IntHr,VanWS180IntHr,VanWS200IntHr))

In [12]:
VanWSdf.to_csv('VansycleLidar10min')
VanWSdfInt.to_csv('VansycleLidarInterpolated10min')
VanWSdfIntHrdf.to_csv('VansycleLidarInterpolatedHourly')