# Final data preparation for CN model

In [264]:
# load required libraries
library(dplyr)
library(tidyr)
library(ggplot2)
library(lubridate)

In [265]:
# create a dataframe with the required columns: 
df <- data.frame(
  date = seq(as.Date("2004-01-01"), as.Date("2023-12-31"), by = "day"),
  temp = NA,
  vpd = NA,
  ppfd = NA,
  netrad = NA,
  patm = NA,
  snow = NA,
  rain = NA,
  tmin = NA,
  tmax = NA,
  vwind = NA,
  fapar = NA,
  co2 = NA,
  ccov = NA,
  nee = NA
)
# print the head of the dataframe
head(df)

Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>
1,2004-01-01,,,,,,,,,,,,,,
2,2004-01-02,,,,,,,,,,,,,,
3,2004-01-03,,,,,,,,,,,,,,
4,2004-01-04,,,,,,,,,,,,,,
5,2004-01-05,,,,,,,,,,,,,,
6,2004-01-06,,,,,,,,,,,,,,


## load specific csv files and extract required columns

### temp, tmin, tmax

In [266]:
# read temp, tmin and tmax data
temp_data <- read.csv("../data/01_data_prep/01_temp_tmin_tmax.csv")
# print the head of the temp_data
head(temp_data)
# convert date column to Date type
temp_data$date <- as.Date(temp_data$date)

# update the existing columns in df with values from temp_data
df <- df %>% 
    left_join(temp_data %>% select(date, temp, tmin, tmax), by = "date", suffix = c("", "_new")) %>%
    mutate(
        temp = temp_new,
        tmin = tmin_new,
        tmax = tmax_new
    ) %>%
    select(-temp_new, -tmin_new, -tmax_new)
# print the head of the df
head(df)

Unnamed: 0_level_0,date,temp,temp_day,tmin,tmax,TA_F,TA_F_QC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2004-01-01,-0.38925,-0.2391667,-1.23,-0.01,-0.389,0.8125
2,2004-01-02,-1.068542,-0.6558333,-2.06,-0.05,-1.069,1.0
3,2004-01-03,-3.388542,-3.0229167,-5.82,-2.19,-3.389,1.0
4,2004-01-04,-5.488958,-4.8404167,-10.45,-1.48,-5.489,1.0
5,2004-01-05,-2.451458,-1.5483333,-8.1,0.53,-2.451,1.0
6,2004-01-06,-2.562083,-2.1308333,-7.24,0.35,-2.562,1.0


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>
1,2004-01-01,-0.38925,,,,,,,-1.23,-0.01,,,,,
2,2004-01-02,-1.068542,,,,,,,-2.06,-0.05,,,,,
3,2004-01-03,-3.388542,,,,,,,-5.82,-2.19,,,,,
4,2004-01-04,-5.488958,,,,,,,-10.45,-1.48,,,,,
5,2004-01-05,-2.451458,,,,,,,-8.1,0.53,,,,,
6,2004-01-06,-2.562083,,,,,,,-7.24,0.35,,,,,


### vwind

In [267]:
# read the vwind data
vwind_data <- read.csv("../data/01_data_prep/01a_vwind.csv")
# print the head of the vwind_data
head(vwind_data)
# convert date column to Date type
vwind_data$date <- as.Date(vwind_data$date)

# update the existing columns in df with values from vwind_data
df <- df %>% 
    left_join(vwind_data %>% select(date, wind_velocity_ms), by = "date", suffix = c("", "_new")) %>%
    mutate(vwind = wind_velocity_ms) %>%
    select(-wind_velocity_ms)
# print the head of the df
head(df)

Unnamed: 0_level_0,date,wind_velocity_ms
Unnamed: 0_level_1,<chr>,<dbl>
1,2004-01-01,1.52
2,2004-01-02,2.022
3,2004-01-03,2.716
4,2004-01-04,1.089
5,2004-01-05,0.675
6,2004-01-06,0.701


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>
1,2004-01-01,-0.38925,,,,,,,-1.23,-0.01,1.52,,,,
2,2004-01-02,-1.068542,,,,,,,-2.06,-0.05,2.022,,,,
3,2004-01-03,-3.388542,,,,,,,-5.82,-2.19,2.716,,,,
4,2004-01-04,-5.488958,,,,,,,-10.45,-1.48,1.089,,,,
5,2004-01-05,-2.451458,,,,,,,-8.1,0.53,0.675,,,,
6,2004-01-06,-2.562083,,,,,,,-7.24,0.35,0.701,,,,


### vpd

In [268]:
# read vpd data
vpd_data <- read.csv("../data/01_data_prep/02_vpd.csv")
# print the head of the vpd_data
head(vpd_data)
# convert date column to Date type
vpd_data$date <- as.Date(vpd_data$date)

# update the existing columns in df with values from vpd_data
df <- df %>% 
    left_join(vpd_data %>% select(date, vpd_day), by = "date") %>%
    mutate(vpd = vpd_day) %>%
    select(-vpd_day)
# print the head of the df
head(df)

Unnamed: 0_level_0,date,vpd_day,vpd_night,vpd_24h,vpd_daily_flux
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
1,2004-01-01,0.065125,0.04004167,0.05258333,0.053
2,2004-01-02,0.0,0.02091667,0.01045833,0.01
3,2004-01-03,0.070875,0.14075,0.1058125,0.106
4,2004-01-04,0.7600417,0.41058333,0.5853125,0.585
5,2004-01-05,0.2392917,0.27658333,0.2579375,0.258
6,2004-01-06,0.238375,0.05025,0.1443125,0.144


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>
1,2004-01-01,-0.38925,0.065125,,,,,,-1.23,-0.01,1.52,,,,
2,2004-01-02,-1.068542,0.0,,,,,,-2.06,-0.05,2.022,,,,
3,2004-01-03,-3.388542,0.070875,,,,,,-5.82,-2.19,2.716,,,,
4,2004-01-04,-5.488958,0.7600417,,,,,,-10.45,-1.48,1.089,,,,
5,2004-01-05,-2.451458,0.2392917,,,,,,-8.1,0.53,0.675,,,,
6,2004-01-06,-2.562083,0.238375,,,,,,-7.24,0.35,0.701,,,,


### ppfd

In [269]:
# read ppfd data
ppfd_data <- read.csv("../data/01_data_prep/03b_splash_ppfd.csv")
# print the head of the ppfd_data
head(ppfd_data)
# convert date column to Date type
ppfd_data$date <- as.Date(ppfd_data$date)

# update the existing columns in df with values from ppfd_data
df <- df %>% 
    left_join(ppfd_data %>% select(date, ppfd), by = "date", suffix = c("", "_new")) %>%
    mutate(ppfd = ppfd_new) %>%
    select(-ppfd_new)
# print the head of the df
head(df)

Unnamed: 0_level_0,date,year,doy,latitude,longitude,elevation,SW_IN_F,SW_IN_F_MJ,ppfd_obs,ppfd
Unnamed: 0_level_1,<chr>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
1,2004-01-01,2004,1,47.28642,7.73375,452,36.666,3.167942,6.514874,6.514874
2,2004-01-02,2004,2,47.28642,7.73375,452,40.078,3.462739,7.121123,7.121123
3,2004-01-03,2004,3,47.28642,7.73375,452,40.147,3.468701,7.133383,7.133383
4,2004-01-04,2004,4,47.28642,7.73375,452,53.699,4.639594,9.541324,9.541324
5,2004-01-05,2004,5,47.28642,7.73375,452,37.888,3.273523,6.732,6.732
6,2004-01-06,2004,6,47.28642,7.73375,452,49.015,4.234896,8.709064,8.709064


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>
1,2004-01-01,-0.38925,0.065125,6.514874,,,,,-1.23,-0.01,1.52,,,,
2,2004-01-02,-1.068542,0.0,7.121123,,,,,-2.06,-0.05,2.022,,,,
3,2004-01-03,-3.388542,0.070875,7.133383,,,,,-5.82,-2.19,2.716,,,,
4,2004-01-04,-5.488958,0.7600417,9.541324,,,,,-10.45,-1.48,1.089,,,,
5,2004-01-05,-2.451458,0.2392917,6.732,,,,,-8.1,0.53,0.675,,,,
6,2004-01-06,-2.562083,0.238375,8.709064,,,,,-7.24,0.35,0.701,,,,


### netrad

In [270]:
# read netrad data
netrad_data <- read.csv("../data/01_data_prep/04_netrad.csv")
# print the head of the netrad_data 
head(netrad_data)
# convert date column to Date type
netrad_data$TIMESTAMP <- as.Date(netrad_data$TIMESTAMP)
# rename the TIMESTAMP column to date
colnames(netrad_data)[colnames(netrad_data) == "TIMESTAMP"] <- "date"
# update the existing columns in df with values from netrad_data
df <- df %>% 
    left_join(netrad_data %>% select(date, netrad), by = "date", suffix = c("", "_new")) %>%
    mutate(netrad = netrad_new) %>%
    select(-netrad_new)
# print the head of the df
head(df)

Unnamed: 0_level_0,TIMESTAMP,netrad,netrad_method,netrad_approx,netrad_simple,netrad_energy_balance
Unnamed: 0_level_1,<chr>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>
1,2004-01-01,1.33971,Energy Balance,30.30515,25.6662,1.33971
2,2004-01-02,-4.38167,Energy Balance,34.67159,28.0546,-4.38167
3,2004-01-03,-4.58373,Energy Balance,16.32752,28.1029,-4.58373
4,2004-01-04,-9.01879,Energy Balance,28.20961,37.5893,-9.01879
5,2004-01-05,-6.912368,Energy Balance,36.52219,26.5216,-6.912368
6,2004-01-06,-11.30939,Energy Balance,32.12189,34.3105,-11.30939


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>
1,2004-01-01,-0.38925,0.065125,6.514874,1.33971,,,,-1.23,-0.01,1.52,,,,
2,2004-01-02,-1.068542,0.0,7.121123,-4.38167,,,,-2.06,-0.05,2.022,,,,
3,2004-01-03,-3.388542,0.070875,7.133383,-4.58373,,,,-5.82,-2.19,2.716,,,,
4,2004-01-04,-5.488958,0.7600417,9.541324,-9.01879,,,,-10.45,-1.48,1.089,,,,
5,2004-01-05,-2.451458,0.2392917,6.732,-6.912368,,,,-8.1,0.53,0.675,,,,
6,2004-01-06,-2.562083,0.238375,8.709064,-11.30939,,,,-7.24,0.35,0.701,,,,


### pa

In [271]:
# read pa data
pa_data <- read.csv("../data/01_data_prep/05_pa.csv")
# print the head of the pa_data
head(pa_data)
# convert date column to Date type
pa_data$TIMESTAMP <- as.Date(pa_data$TIMESTAMP)
# rename the TIMESTAMP column to date
colnames(pa_data)[colnames(pa_data) == "TIMESTAMP"] <- "date"
# update the existing columns in df with values from pa_data
df <- df %>% 
    left_join(pa_data %>% select(date, PA_F_pa), by = "date", suffix = c("", "_new"))  %>% 
    mutate(patm = PA_F_pa) %>%
    select(-PA_F_pa)
# print the head of the df
head(df)

Unnamed: 0_level_0,TIMESTAMP,PA_F_kpa,PA_F_pa
Unnamed: 0_level_1,<chr>,<dbl>,<int>
1,2004-01-01,95.825,95825
2,2004-01-02,95.455,95455
3,2004-01-03,96.232,96232
4,2004-01-04,96.367,96367
5,2004-01-05,96.71,96710
6,2004-01-06,96.656,96656


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>,<lgl>
1,2004-01-01,-0.38925,0.065125,6.514874,1.33971,95825,,,-1.23,-0.01,1.52,,,,
2,2004-01-02,-1.068542,0.0,7.121123,-4.38167,95455,,,-2.06,-0.05,2.022,,,,
3,2004-01-03,-3.388542,0.070875,7.133383,-4.58373,96232,,,-5.82,-2.19,2.716,,,,
4,2004-01-04,-5.488958,0.7600417,9.541324,-9.01879,96367,,,-10.45,-1.48,1.089,,,,
5,2004-01-05,-2.451458,0.2392917,6.732,-6.912368,96710,,,-8.1,0.53,0.675,,,,
6,2004-01-06,-2.562083,0.238375,8.709064,-11.30939,96656,,,-7.24,0.35,0.701,,,,


### fapar

In [272]:
# read fapar data
fapar_data <- read.csv("../data/01_data_prep/06a2_daily_fapar_2004-2023.csv")
# print the head of the fapar_data
head(fapar_data)
# convert date column to Date type
fapar_data$date <- as.Date(fapar_data$date)

# update the existing columns in df with values from fapar_data
df <- df %>% 
    left_join(fapar_data %>% select(date, fapar), by = "date", suffix = c("", "_new")) %>%
    mutate(fapar = fapar_new) %>%
    select(-fapar_new)
# print the head of the df
head(df)

Unnamed: 0_level_0,date,fapar
Unnamed: 0_level_1,<chr>,<dbl>
1,2004-01-01,0.02
2,2004-01-02,0.02
3,2004-01-03,0.02
4,2004-01-04,0.02
5,2004-01-05,0.02
6,2004-01-06,0.02


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>,<lgl>
1,2004-01-01,-0.38925,0.065125,6.514874,1.33971,95825,,,-1.23,-0.01,1.52,0.02,,,
2,2004-01-02,-1.068542,0.0,7.121123,-4.38167,95455,,,-2.06,-0.05,2.022,0.02,,,
3,2004-01-03,-3.388542,0.070875,7.133383,-4.58373,96232,,,-5.82,-2.19,2.716,0.02,,,
4,2004-01-04,-5.488958,0.7600417,9.541324,-9.01879,96367,,,-10.45,-1.48,1.089,0.02,,,
5,2004-01-05,-2.451458,0.2392917,6.732,-6.912368,96710,,,-8.1,0.53,0.675,0.02,,,
6,2004-01-06,-2.562083,0.238375,8.709064,-11.30939,96656,,,-7.24,0.35,0.701,0.02,,,


### co2

In [273]:
# read co2 data
co2_data <- read.csv("../data/01_data_prep/07_co2.csv")
# print the head of the co2_data
head(co2_data)
# convert date column to Date type
co2_data$TIMESTAMP <- as.Date(co2_data$TIMESTAMP)
# rename the TIMESTAMP column to date
colnames(co2_data)[colnames(co2_data) == "TIMESTAMP"] <- "date"
# update the existing columns in df with values from co2_data
df <- df %>% 
    left_join(co2_data %>% select(date, CO2_F_MDS), by = "date", suffix = c("", "_new"))  %>% 
    mutate(co2 = CO2_F_MDS) %>%
    select(-CO2_F_MDS)
# print the head of the df
head(df)

Unnamed: 0_level_0,TIMESTAMP,CO2_F_MDS
Unnamed: 0_level_1,<chr>,<dbl>
1,2004-01-01,384.872
2,2004-01-02,377.952
3,2004-01-03,385.627
4,2004-01-04,402.001
5,2004-01-05,437.166
6,2004-01-06,507.266


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>
1,2004-01-01,-0.38925,0.065125,6.514874,1.33971,95825,,,-1.23,-0.01,1.52,0.02,384.872,,
2,2004-01-02,-1.068542,0.0,7.121123,-4.38167,95455,,,-2.06,-0.05,2.022,0.02,377.952,,
3,2004-01-03,-3.388542,0.070875,7.133383,-4.58373,96232,,,-5.82,-2.19,2.716,0.02,385.627,,
4,2004-01-04,-5.488958,0.7600417,9.541324,-9.01879,96367,,,-10.45,-1.48,1.089,0.02,402.001,,
5,2004-01-05,-2.451458,0.2392917,6.732,-6.912368,96710,,,-8.1,0.53,0.675,0.02,437.166,,
6,2004-01-06,-2.562083,0.238375,8.709064,-11.30939,96656,,,-7.24,0.35,0.701,0.02,507.266,,


### rain

In [274]:
# read rain data
rain_data <- read.csv("../data/01_data_prep/08_rain.csv")
# print the head of the rain_data
head(rain_data)
# convert date column to Date type
rain_data$date <- as.Date(rain_data$date)
# update the existing columns in df with values from rain_data
df <- df %>% 
    left_join(rain_data %>% select(date, rain_mm), by = "date", suffix = c("", "_new")) %>%
    mutate(rain = rain_mm) %>%
    select(-rain_mm)
# print the head of the df
head(df)

Unnamed: 0_level_0,date,rain_mm
Unnamed: 0_level_1,<chr>,<dbl>
1,2004-01-01,1.6
2,2004-01-02,0.5
3,2004-01-03,0.0
4,2004-01-04,0.0
5,2004-01-05,0.0
6,2004-01-06,0.0


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<lgl>
1,2004-01-01,-0.38925,0.065125,6.514874,1.33971,95825,,1.6,-1.23,-0.01,1.52,0.02,384.872,,
2,2004-01-02,-1.068542,0.0,7.121123,-4.38167,95455,,0.5,-2.06,-0.05,2.022,0.02,377.952,,
3,2004-01-03,-3.388542,0.070875,7.133383,-4.58373,96232,,0.0,-5.82,-2.19,2.716,0.02,385.627,,
4,2004-01-04,-5.488958,0.7600417,9.541324,-9.01879,96367,,0.0,-10.45,-1.48,1.089,0.02,402.001,,
5,2004-01-05,-2.451458,0.2392917,6.732,-6.912368,96710,,0.0,-8.1,0.53,0.675,0.02,437.166,,
6,2004-01-06,-2.562083,0.238375,8.709064,-11.30939,96656,,0.0,-7.24,0.35,0.701,0.02,507.266,,


### ccov

In [275]:
# read ccov data
ccov_data <- read.csv("../data/01_data_prep/07_era5_cloud_cover_2004-2023.csv")

# convert date column to Date type
ccov_data$date <- as.Date(ccov_data$date)
# print the head of the ccov_data
head(ccov_data)
# update the existing columns in df with values from ccov_data
df <- df %>% 
    left_join(ccov_data %>% select(date, cloud_cover_percentage), by = "date", suffix = c("", "_new"))  %>% 
    mutate(ccov = cloud_cover_percentage) %>%
    select(-cloud_cover_percentage)
# print the head of the df
head(df)

Unnamed: 0_level_0,date,cloud_cover,cloud_cover_percentage
Unnamed: 0_level_1,<date>,<dbl>,<dbl>
1,2004-01-01,1.0,100.0
2,2004-01-02,0.9373863,93.73863
3,2004-01-03,1.0,100.0
4,2004-01-04,0.77584,77.584
5,2004-01-05,0.8695803,86.95803
6,2004-01-06,0.9965465,99.65465


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>
1,2004-01-01,-0.38925,0.065125,6.514874,1.33971,95825,,1.6,-1.23,-0.01,1.52,0.02,384.872,100.0,
2,2004-01-02,-1.068542,0.0,7.121123,-4.38167,95455,,0.5,-2.06,-0.05,2.022,0.02,377.952,93.73863,
3,2004-01-03,-3.388542,0.070875,7.133383,-4.58373,96232,,0.0,-5.82,-2.19,2.716,0.02,385.627,100.0,
4,2004-01-04,-5.488958,0.7600417,9.541324,-9.01879,96367,,0.0,-10.45,-1.48,1.089,0.02,402.001,77.584,
5,2004-01-05,-2.451458,0.2392917,6.732,-6.912368,96710,,0.0,-8.1,0.53,0.675,0.02,437.166,86.95803,
6,2004-01-06,-2.562083,0.238375,8.709064,-11.30939,96656,,0.0,-7.24,0.35,0.701,0.02,507.266,99.65465,


### nee

In [276]:
# read nee data
nee_data <- read.csv("../data/FLX_CH-Oe2_FLUXNET2015_FULLSET_2004-2023_1-3/FLX_CH-Oe2_FLUXNET2015_FULLSET_DD_2004-2023_1-3.csv")

# filter data for NEE_CUT_USTAR50 variable (net ecosystem exchange)
nee_data <- nee_data %>%
    select(TIMESTAMP, NEE_CUT_USTAR50)
head(nee_data)

# convert TIMESTAMP to Date type yyyy-mm-dd from YYYYMMDD format
nee_data$TIMESTAMP <- as.Date(as.character(nee_data$TIMESTAMP), format = "%Y%m%d")

# rename columns for clarity
colnames(nee_data) <- c("date", "nee")
head(nee_data)

# update the existing columns in df with values from nee_data
df <- df %>% 
    left_join(nee_data %>% select(date, nee), by = "date", suffix = c("", "_new")) %>%
    mutate(nee = nee_new) %>%
    select(-nee_new)

# print the head of the df
head(df)


Unnamed: 0_level_0,TIMESTAMP,NEE_CUT_USTAR50
Unnamed: 0_level_1,<int>,<dbl>
1,20040101,0.760671
2,20040102,0.787889
3,20040103,0.768699
4,20040104,0.81503
5,20040105,0.746168
6,20040106,1.52801


Unnamed: 0_level_0,date,nee
Unnamed: 0_level_1,<date>,<dbl>
1,2004-01-01,0.760671
2,2004-01-02,0.787889
3,2004-01-03,0.768699
4,2004-01-04,0.81503
5,2004-01-05,0.746168
6,2004-01-06,1.52801


Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2004-01-01,-0.38925,0.065125,6.514874,1.33971,95825,,1.6,-1.23,-0.01,1.52,0.02,384.872,100.0,0.760671
2,2004-01-02,-1.068542,0.0,7.121123,-4.38167,95455,,0.5,-2.06,-0.05,2.022,0.02,377.952,93.73863,0.787889
3,2004-01-03,-3.388542,0.070875,7.133383,-4.58373,96232,,0.0,-5.82,-2.19,2.716,0.02,385.627,100.0,0.768699
4,2004-01-04,-5.488958,0.7600417,9.541324,-9.01879,96367,,0.0,-10.45,-1.48,1.089,0.02,402.001,77.584,0.81503
5,2004-01-05,-2.451458,0.2392917,6.732,-6.912368,96710,,0.0,-8.1,0.53,0.675,0.02,437.166,86.95803,0.746168
6,2004-01-06,-2.562083,0.238375,8.709064,-11.30939,96656,,0.0,-7.24,0.35,0.701,0.02,507.266,99.65465,1.52801


In [277]:
# make all the values in snow column to 0
df$snow <- 0
# print the head of the df
head(df)

Unnamed: 0_level_0,date,temp,vpd,ppfd,netrad,patm,snow,rain,tmin,tmax,vwind,fapar,co2,ccov,nee
Unnamed: 0_level_1,<date>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2004-01-01,-0.38925,0.065125,6.514874,1.33971,95825,0,1.6,-1.23,-0.01,1.52,0.02,384.872,100.0,0.760671
2,2004-01-02,-1.068542,0.0,7.121123,-4.38167,95455,0,0.5,-2.06,-0.05,2.022,0.02,377.952,93.73863,0.787889
3,2004-01-03,-3.388542,0.070875,7.133383,-4.58373,96232,0,0.0,-5.82,-2.19,2.716,0.02,385.627,100.0,0.768699
4,2004-01-04,-5.488958,0.7600417,9.541324,-9.01879,96367,0,0.0,-10.45,-1.48,1.089,0.02,402.001,77.584,0.81503
5,2004-01-05,-2.451458,0.2392917,6.732,-6.912368,96710,0,0.0,-8.1,0.53,0.675,0.02,437.166,86.95803,0.746168
6,2004-01-06,-2.562083,0.238375,8.709064,-11.30939,96656,0,0.0,-7.24,0.35,0.701,0.02,507.266,99.65465,1.52801


## Saving final file

In [278]:
# find missing values in the dataframe
missing_values <- colSums(is.na(df))
# print the missing values
print(missing_values)
# save forcing data as csv
write.csv(df, "../data/01_data_prep/09_forcing_data_2004-2023.csv", row.names = FALSE)

  date   temp    vpd   ppfd netrad   patm   snow   rain   tmin   tmax  vwind 
     0      0      0      0      0      0      0      0      0      0      0 
 fapar    co2   ccov    nee 
     0      0      0      0 


## Prepare forcing data for CN model

In [279]:
# Load packages without warnings and messages
suppressPackageStartupMessages({
  library(dplyr)
  library(tidyr)
  library(ggplot2)
  library(patchwork)
  library(cowplot)
  library(visdat)
  library(here)
  library(lubridate)
  library(readr)
  library(naniar)
  library(purrr)
  library(rsofun)
})
# read rds forcing data file previously uploaded for cn model as drivers data
drivers <- readRDS("../data/FLX_CH-Oe2_FLUXNET2015_FULLSET_2004-2023_1-3/rsofun_driver_data_v3.4.2.rds") %>% 
  dplyr::filter(sitename == "CH-Oe2") 

# replace forcing data forcing_df <- drivers$forcing[[1]] with a new csv file

# read csv file with forcing data
forcing_df <- read_csv("../data/01_data_prep/09_forcing_data_2004-2023.csv") 

# remove drivers$forcing[[1]] from drivers_data
drivers <- drivers %>% 
  dplyr::select(-forcing)

# add forcing_df to drivers_data as forcing
drivers <- drivers %>% 
  dplyr::mutate(forcing = list(forcing_df))

# Add management data 
# add management data into drivers$forcing
# Management data
#N input
n_input <- read.csv("../data/mangement_data/04_23choe2_ninput.csv")

#fharv 
fharv <- read.csv("../data/mangement_data/04_23ch0e2_fharv.csv")
# convert to fraction from percentage to be consistent with the rest of the data
fharv$fharv <- fharv$fharv / 100

#seed 
seed <- read.csv("../data/mangement_data/04_23ch0e2_seed.csv")

#yieldcn  
yieldcn <- read.csv("../data/mangement_data/04_23ch0e2_yieldcn.csv")

## N deposition------------
# Reactive N input needs to be in gN per day
# added to forcing time series: specify quantity of N added on which day

# Example N input given a constant rate each day
n_input_add <- function(data, n_input) {
  # Ensure date columns are in Date format
  n_input <- n_input %>% mutate(date = as.Date(date))
  
  # Remove duplicates from n_input based on the date column
  n_input <- n_input %>% distinct(date, .keep_all = TRUE)
  
  data <- data %>%
    mutate(forcing = purrr::map(forcing, ~ {
      # Merge forcing with n_input on date
      merged <- left_join(., n_input, by = "date")
      
      # Fill NA values in dno3 and dnh4 with the default dnh4 value from the n_input DataFrame
      merged <- merged %>%
        mutate(
          dno3 = ifelse(is.na(dno3), 0.002003263, dno3),
          dnh4 = ifelse(is.na(dnh4), 0.002017981, dnh4)
        )
      
      # Ensure the merged DataFrame does not have more rows than the original forcing DataFrame
      merged <- merged %>% slice(1:nrow(.))
      
      return(merged)
    }))
  
  return(data)
}
# Add N input to forcing data this will join the drivers data with the N input data
drivers <- n_input_add(drivers, n_input)


## Harvesting-----------
# The fraction of biomass harvested per day needs to be specified in the forcing time series
# cseed and nseed new seeds added after harvesting
# Example driver update assumes harvesting is 0 and new seeds planted after harvest

fharv_seed_add <- function(data, fharv, seed) {
  # Ensure date columns are in Date format
  fharv <- fharv %>% mutate(date = as.Date(date)) %>% distinct(date, .keep_all = TRUE)
  seed <- seed %>% mutate(date = as.Date(date)) %>% distinct(date, .keep_all = TRUE)
  
  data <- data %>%
    mutate(forcing = purrr::map(forcing, ~ {
      # Merge forcing with fharv and seed on date
      merged <- left_join(., fharv, by = "date") %>%
        left_join(seed, by = "date") %>%
        mutate(
          fharv = coalesce(fharv, 0),    # Fill NA values in fharv
          cseed = coalesce(cseed, 0),    # Fill NA values in cseed
          nseed = coalesce(nseed, 0)      # Fill NA values in nseed
        ) %>%
        slice(1:nrow(.))  # Ensure merged DataFrame does not have more rows than original
      
      return(merged)
    }))
  
  return(data)
}
# Add harvesting and seed data to forcing data 
drivers <- fharv_seed_add(drivers, fharv, seed)

## Simulation parameters------------
# The spinup of cn_model must be long enough to equilibrate fluxes

# Function to modify specific columns in each dataframe
modify_params <- function(df_list, spinupyears_val, recycle_val) {
  # Map over each dataframe in the list
  df_list <- map(df_list, ~ {
    # Modify specific columns
    mutate(.x,
           spinupyears = spinupyears_val,
           recycle = recycle_val)
  })
  
  return(df_list)
}

# FLUXNET data for site CH-Oe2 (Oensingen, Switzerland)
drivers$params_siml <- modify_params(drivers$params_siml, 15, 2)
drivers$params_siml[[1]]$c_only <- TRUE

nrow(drivers$forcing[[1]])
# remove rows from drivers$forcing[[1]], when it is leap year such as 2004-02-29
drivers$forcing[[1]] <- drivers$forcing[[1]][!grepl("-02-29", drivers$forcing[[1]]$date), ]
# show number of rows in drivers$forcing[[1]]
nrow(drivers$forcing[[1]])

# save the updated drivers data
saveRDS(drivers, "../data/CH-Oe2_2004-2023_final_ready_for_CNmodel_run.rds")

[1mRows: [22m[34m7305[39m [1mColumns: [22m[34m15[39m
[36m--[39m [1mColumn specification[22m [36m--------------------------------------------------------[39m
[1mDelimiter:[22m ","
[32mdbl[39m  (14): temp, vpd, ppfd, netrad, patm, snow, rain, tmin, tmax, vwind, fap...
[34mdate[39m  (1): date

[36mi[39m Use `spec()` to retrieve the full column specification for this data.
[36mi[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


# Error handling for leap year data
- The model was giving errors when it was run with leap year data, so we are removing the leap year data from the forcing data.
- This is done by removing the rows where date contains "-02-29".