# Flux data

In [1]:
if (grepl("docs", getwd(), fixed = TRUE)) {
  setwd("..")
}

In [2]:
library(REddyProc)
library(dplyr)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [6]:
data <- read.csv("data/CSV/TOA5_3989.flux.csv")

In [7]:
# format timestamps
# specify GMT as tz to prevent DST adjustments
data$TIMESTAMP <- as.POSIXct(
  data$TIMESTAMP, format = "%Y-%m-%d %H:%M:%S", tz = "GMT"
)

In [5]:
head(data)

Unnamed: 0_level_0,TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
Unnamed: 0_level_1,<dttm>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2014-11-19 12:30:00,29287,-0.02738776,41.11178,27.61276,24.76925,0.2095944,0.4146389,0.3397227,-0.01496416,⋯,56.0,-0.08351655,39.15887,0.01285764,0.04327115,0.410987,1.541922,7.890273,11.29643,77.49195
2,2014-11-19 13:00:00,29288,-0.03498442,46.15891,28.56569,25.41201,0.1617521,0.3645584,0.255728,0.008030742,⋯,56.0,-0.09378111,44.07496,0.01447388,0.04432281,0.4691145,1.614831,7.987618,11.80993,75.96396
3,2014-11-19 13:30:00,29289,0.1808234,-6.167122,-24.50253,-23.7634,0.1895548,0.3944006,0.8363222,-1.465492,⋯,56.0,0.2240997,-4.744839,-0.00156682,-0.04170947,-0.04999511,-1.372288,7.917885,11.58026,76.34363
4,2014-11-19 14:00:00,29290,2.181447,-36.54642,-8.166809,-5.908159,0.07494499,0.246819,0.8353181,-0.8943371,⋯,59.64111,2.224159,-35.73215,-0.02265313,-0.02005932,-0.3661243,-0.4481432,7.774566,9.416086,85.95309
5,2014-11-19 14:30:00,29291,-0.07790546,37.35801,-5.19113,-7.278997,0.03453151,0.1677954,0.3394781,-0.03034091,⋯,56.0,-0.07737738,37.25424,0.01248777,-0.01301585,0.4042646,-0.3004996,8.205495,10.59023,84.24697
6,2014-11-19 15:00:00,29292,-0.01854837,29.91426,-19.60791,-21.03238,0.08993658,0.2712201,0.2265035,-0.1086466,⋯,56.0,0.008064821,30.7132,0.01006716,-0.03668035,0.3333376,-1.13228,8.181075,11.32896,80.18222


In [6]:
nrow(data)

## Rows with NA data

In [7]:
head(data[!complete.cases(data),])

Unnamed: 0_level_0,TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
Unnamed: 0_level_1,<dttm>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
82,2014-11-21 05:00:00,29368,,,,,,,,,⋯,65.89661,,,,,,,8.502651,10.60244,87.23547
84,2014-11-21 06:00:00,29370,,,,,,,,,⋯,77.21928,,,,,,,8.442175,10.50854,87.13308
251,2014-11-24 17:30:00,29537,,,,,,,,,⋯,91.89167,,,,,,,8.087642,8.5086,94.81455
252,2014-11-24 18:00:00,29538,,,,,,,,,⋯,93.99828,,,,,,,7.994737,8.28096,95.10827
253,2014-11-24 18:30:00,29539,,,,,,,,,⋯,94.0,,,,,,,8.010795,8.294072,95.21899
254,2014-11-24 19:00:00,29540,,,,,,,,,⋯,94.0,,,,,,,8.000172,8.23893,95.43085


In [8]:
nrow(data[!complete.cases(data),])

In [9]:
sum(is.na(data))

In [10]:
colSums(is.na(data))

## Add missing time series rows

In [8]:
# start: first day of the first year at 00:30
# end: first day of the year after the final year at 00:00
# format timestamps - specify GMT as tz to prevent DST adjustments
ts <- seq.POSIXt(
  as.POSIXct(
    paste(format(data$TIMESTAMP[[1]], "%Y"), "-01-01 00:30:00", sep = ""),
    format = "%Y-%m-%d %H:%M:%S", tz = "GMT"
  ),
  as.POSIXct(
    paste(
      as.numeric(format(data$TIMESTAMP[[nrow(data)]], "%Y")) + 1,
      "-01-01 00:00:00", sep = ""
    ),
    format = "%Y-%m-%d %H:%M:%S", tz = "GMT"
  ),
  by = "30 min"
)

In [9]:
ts <- merge(data.frame(TIMESTAMP = ts), data, all = TRUE)

In [13]:
head(ts)

Unnamed: 0_level_0,TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
Unnamed: 0_level_1,<dttm>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2014-01-01 00:30:00,,,,,,,,,,⋯,,,,,,,,,,
2,2014-01-01 01:00:00,,,,,,,,,,⋯,,,,,,,,,,
3,2014-01-01 01:30:00,,,,,,,,,,⋯,,,,,,,,,,
4,2014-01-01 02:00:00,,,,,,,,,,⋯,,,,,,,,,,
5,2014-01-01 02:30:00,,,,,,,,,,⋯,,,,,,,,,,
6,2014-01-01 03:00:00,,,,,,,,,,⋯,,,,,,,,,,


In [14]:
tail(ts)

Unnamed: 0_level_0,TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
Unnamed: 0_level_1,<dttm>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
52603,2016-12-31 21:30:00,,,,,,,,,,⋯,,,,,,,,,,
52604,2016-12-31 22:00:00,,,,,,,,,,⋯,,,,,,,,,,
52605,2016-12-31 22:30:00,,,,,,,,,,⋯,,,,,,,,,,
52606,2016-12-31 23:00:00,,,,,,,,,,⋯,,,,,,,,,,
52607,2016-12-31 23:30:00,,,,,,,,,,⋯,,,,,,,,,,
52608,2017-01-01 00:00:00,,,,,,,,,,⋯,,,,,,,,,,


In [15]:
nrow(ts)

## Unit conversions

## REddyProc

<https://github.com/bgctw/REddyProc>

- The number of steps per day can be 24 (hourly) or 48 (half-hourly).
- The time stamp needs to be provided in POSIX time format, equidistant half-hours, and stamped on the half hour.
- The sEddyProc procedures require at least three months of data.
- Full days of data are preferred: the total amount of data rows should be a multiple of the daily time step, and in accordance with FLUXNET standards, the dataset is spanning from the end of the first (half-)hour (0:30 or 1:00, respectively) and to midnight (0:00)

Source: REddyProc manual

In [57]:
# check time series data - should have 48 daily timestamps
fCheckHHTimeSeries(Time = ts$TIMESTAMP, DTS = 48)

In [58]:
EddyData <- ts

In [59]:
# if not provided, calculate vapour pressure deficit from relative humidity
# and air temperature
EddyData$VPD <- fCalcVPDfromRHandTair(
  rH = EddyData$rh_hmp_mean,
  Tair = EddyData$t_hmp_mean
)

In [18]:
# replace long runs of equal net ecosystem exchange values by NA
EddyData <- filterLongRuns(EddyData, "Fc_wpl")

In [19]:
# initalize R5 reference class sEddyProc for post-processing of eddy data
# with the variables needed for post-processing later
# colnames are NEE, global radiation, air temperature, VPD, and ustar
EProc <- sEddyProc$new(
  ID = "IE-Kil",
  Data = EddyData,
  ColNames = c("Fc_wpl", "swrad??", "t_hmp_mean", "VPD", "u_star"),
  ColPOSIXTime = "TIMESTAMP"
)

New sEddyProc class for site 'IE-Kil'



In [32]:
# location of IE-Kil
EProc$sSetLocationInfo(
  LatDeg = 51.9683611, LongDeg = -9.9003056, TimeZoneHour = 0
)

In [21]:
# fill NEE gaps with MDS gap filling algorithm (without prior ustar filtering)
EProc$sMDSGapFill("Fc_wpl", FillAll = FALSE)

“sMDSGapFill::: The long gap between position 23934 and 42736 will not be filled!”
“sMDSGapFill::: The long gap between position 1 and 15480 will not be filled!”
“sMDSGapFill::: The long gap between position 44092 and 52608 will not be filled!”
“sMDSGapFill::: The long gap between position 18655 and 22580 will not be filled!”
Initialized variable 'Fc_wpl' with 49669 real gaps for gap filling.

Restriced MDS algorithm for gap filling of 'Fc_wpl' with no meteo conditions and hence only MDC.

“sMDSGapFill::: No meteo available for MDS gap filling!”
Mean diurnal course with window size of 0 days: .

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
39

Mean diurnal course with window size of 1 days: .

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
83

Mean diurnal course with window size of 2 days: .

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
48

Mean diurnal course with window size of 7 days: .

.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
.
240

M

In [22]:
# export gap filled and partitioned data to standard data frame
FilledEddyData <- EProc$sExportResults()

In [27]:
head(FilledEddyData[!is.na(FilledEddyData),])

Unnamed: 0_level_0,Fc_wpl_orig,Fc_wpl_f,Fc_wpl_fqc,Fc_wpl_fall,Fc_wpl_fall_qc,Fc_wpl_fnum,Fc_wpl_fsd,Fc_wpl_fmeth,Fc_wpl_fwin
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
15481,-0.02738776,-0.02738776,0,-0.02738776,,,,,
15482,-0.03498442,-0.03498442,0,-0.03498442,,,,,
15483,0.1808234,0.1808234,0,0.1808234,,,,,
15484,2.181447,2.181447,0,2.181447,,,,,
15485,-0.07790546,-0.07790546,0,-0.07790546,,,,,
15486,-0.01854837,-0.01854837,0,-0.01854837,,,,,


## Save

In [28]:
# convert timestamp back to character before saving
# https://stackoverflow.com/q/76259729
ts$TIMESTAMP <- format(ts$TIMESTAMP)

In [29]:
write.table(
  ts, "data/flux/3989.flux.csv", row.names = FALSE, sep = ",", quote = FALSE
)

In [30]:
write.table(
  FilledEddyData, "data/flux/3989.flux.filled.csv", sep = ",", quote = FALSE
)