# Process TOA5 data

## TOA5_2806.ts_data.dat

Split large (~28 mil rows, 3.2 GB) TOA5 file into manageable (max. 1 mil rows, ~110 MB) files

In [20]:
# view the data
system("sed -n '1,7p' 'data/TOA5/TOA5_2806.ts_data.dat'", intern = TRUE)

In [1]:
# read TOA5 dataset
data <- read.csv("data/TOA5/TOA5_2806.ts_data.dat", skip = 4, header = FALSE)

In [4]:
# extract headers
headers <- read.csv(
  "data/TOA5/TOA5_2806.ts_data.dat", skip = 1, header = FALSE, nrows = 3
)

In [5]:
headers

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11,V12,V13
TIMESTAMP,RECORD,Ux,Uy,Uz,co2,h2o,n2o,Ts,press,diag_csat,t_hmp,e_hmp
TS,RN,m/s,m/s,m/s,mg/m^3,g/m^3,umol/mol,C,kPa,unitless,C,kPa
,,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp


In [44]:
# save headers
write.table(
  headers, "data/CSV/TOA5_2806.ts_data.headers.csv", row.names = FALSE,
  col.names = FALSE, sep = ",", quote = FALSE
)

In [2]:
# assign column names
names(data) <- as.matrix(
  read.csv(
    "data/TOA5/TOA5_2806.ts_data.dat", skip = 1, header = FALSE, nrows = 1
  )
)

In [3]:
head(data)

TIMESTAMP,RECORD,Ux,Uy,Uz,co2,h2o,n2o,Ts,press,diag_csat,t_hmp,e_hmp
2014-12-22 11:47:39.3,761139393,1.69375,0.68975,0.8642501,760.738,8.81152,NAN,12.08542,99.28229,0,11.51643,1.219314
2014-12-22 11:47:39.4,761139394,1.551,0.1625,1.36275,760.2671,8.790375,NAN,12.10226,99.24676,0,11.51643,1.220242
2014-12-22 11:47:39.5,761139395,1.51725,-0.11075,1.74625,760.1978,8.794267,NAN,12.16629,99.24676,0,11.80331,1.2455
2014-12-22 11:47:39.6,761139396,1.95925,-0.34375,0.6390001,760.7865,8.813287,NAN,12.07193,99.24676,0,11.61206,1.227055
2014-12-22 11:47:39.7,761139397,1.3605,-0.455,1.206,760.8245,8.806199,NAN,12.12418,99.25624,0,11.70769,1.235777
2014-12-22 11:47:39.8,761139398,0.5765,-0.9160001,1.22775,760.9031,8.796409,NAN,12.12582,99.22072,0,11.70769,1.236716


In [7]:
summary(data)

                 TIMESTAMP            RECORD                Ux        
 2014-12-22 11:47:39.3:       1   Min.   :761139393   Min.   :-65.53  
 2014-12-22 11:47:39.4:       1   1st Qu.:768264807   1st Qu.:  0.50  
 2014-12-22 11:47:39.5:       1   Median :775390221   Median :  1.36  
 2014-12-22 11:47:39.6:       1   Mean   :775390221   Mean   :  1.79  
 2014-12-22 11:47:39.7:       1   3rd Qu.:782515635   3rd Qu.:  2.78  
 2014-12-22 11:47:39.8:       1   Max.   :789641049   Max.   : 65.53  
 (Other)              :28501651                       NA's   :267862  
       Uy               Uz              co2                  h2o         
 Min.   :-65.53   Min.   :-49.82   Min.   :-5.849e+18   Min.   : -2.122  
 1st Qu.: -1.11   1st Qu.: -0.33   1st Qu.: 8.030e+02   1st Qu.:  5.155  
 Median :  0.24   Median :  0.00   Median : 8.490e+02   Median :  6.366  
 Mean   :  0.62   Mean   :  0.01   Mean   :-3.364e+11   Mean   :  9.375  
 3rd Qu.:  2.56   3rd Qu.:  0.34   3rd Qu.: 1.101e+03   3rd Qu

In [8]:
nrow(data)

In [9]:
# split data into manageable chunks (1 mil rows)
data_split <- split(data, (as.numeric(rownames(data)) - 1) %/% 1000000)

In [10]:
str(data_split)

List of 29
 $ 0 :'data.frame':	1000000 obs. of  13 variables:
  ..$ TIMESTAMP: Factor w/ 28501657 levels "2014-12-22 11:47:39.3",..: 1 2 3 4 5 6 7 8 9 10 ...
  ..$ RECORD   : int [1:1000000] 761139393 761139394 761139395 761139396 761139397 761139398 761139399 761139400 761139401 761139402 ...
  ..$ Ux       : num [1:1000000] 1.69 1.55 1.52 1.96 1.36 ...
  ..$ Uy       : num [1:1000000] 0.69 0.163 -0.111 -0.344 -0.455 ...
  ..$ Uz       : num [1:1000000] 0.864 1.363 1.746 0.639 1.206 ...
  ..$ co2      : num [1:1000000] 761 760 760 761 761 ...
  ..$ h2o      : num [1:1000000] 8.81 8.79 8.79 8.81 8.81 ...
  ..$ n2o      : Factor w/ 1 level "NAN": 1 1 1 1 1 1 1 1 1 1 ...
  ..$ Ts       : num [1:1000000] 12.1 12.1 12.2 12.1 12.1 ...
  ..$ press    : num [1:1000000] 99.3 99.2 99.2 99.2 99.3 ...
  ..$ diag_csat: int [1:1000000] 0 0 0 0 0 0 0 0 0 0 ...
  ..$ t_hmp    : num [1:1000000] 11.5 11.5 11.8 11.6 11.7 ...
  ..$ e_hmp    : num [1:1000000] 1.22 1.22 1.25 1.23 1.24 ...
 $ 1 :'data.frame

In [12]:
str(data_split[[1]])

'data.frame':	1000000 obs. of  13 variables:
 $ TIMESTAMP: Factor w/ 28501657 levels "2014-12-22 11:47:39.3",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ RECORD   : int  761139393 761139394 761139395 761139396 761139397 761139398 761139399 761139400 761139401 761139402 ...
 $ Ux       : num  1.69 1.55 1.52 1.96 1.36 ...
 $ Uy       : num  0.69 0.163 -0.111 -0.344 -0.455 ...
 $ Uz       : num  0.864 1.363 1.746 0.639 1.206 ...
 $ co2      : num  761 760 760 761 761 ...
 $ h2o      : num  8.81 8.79 8.79 8.81 8.81 ...
 $ n2o      : Factor w/ 1 level "NAN": 1 1 1 1 1 1 1 1 1 1 ...
 $ Ts       : num  12.1 12.1 12.2 12.1 12.1 ...
 $ press    : num  99.3 99.2 99.2 99.2 99.3 ...
 $ diag_csat: int  0 0 0 0 0 0 0 0 0 0 ...
 $ t_hmp    : num  11.5 11.5 11.8 11.6 11.7 ...
 $ e_hmp    : num  1.22 1.22 1.25 1.23 1.24 ...


In [24]:
str(data_split[[29]])

'data.frame':	501657 obs. of  13 variables:
 $ TIMESTAMP: Factor w/ 28501657 levels "2014-12-22 11:47:39.3",..: 28000001 28000002 28000003 28000004 28000005 28000006 28000007 28000008 28000009 28000010 ...
 $ RECORD   : int  789139393 789139394 789139395 789139396 789139397 789139398 789139399 789139400 789139401 789139402 ...
 $ Ux       : num  1.88 1.94 1.9 2.19 2.72 ...
 $ Uy       : num  -1.31 -1.21 -1 -1.62 -1.08 ...
 $ Uz       : num  -0.233 -0.294 -0.421 -0.704 -0.776 ...
 $ co2      : num  793 794 794 794 794 ...
 $ h2o      : num  5.77 5.78 5.78 5.8 5.8 ...
 $ n2o      : Factor w/ 1 level "NAN": 1 1 1 1 1 1 1 1 1 1 ...
 $ Ts       : num  6.09 6.03 6.07 6.07 6.01 ...
 $ press    : num  99.7 99.8 99.8 99.8 99.8 ...
 $ diag_csat: int  0 0 0 0 0 0 0 0 0 0 ...
 $ t_hmp    : num  5.96 5.96 5.96 5.96 5.96 ...
 $ e_hmp    : num  0.758 0.759 0.759 0.759 0.759 ...


In [45]:
head(data_split[[1]])

TIMESTAMP,RECORD,Ux,Uy,Uz,co2,h2o,n2o,Ts,press,diag_csat,t_hmp,e_hmp
2014-12-22 11:47:39.3,761139393,1.69375,0.68975,0.8642501,760.738,8.81152,NAN,12.08542,99.28229,0,11.51643,1.219314
2014-12-22 11:47:39.4,761139394,1.551,0.1625,1.36275,760.2671,8.790375,NAN,12.10226,99.24676,0,11.51643,1.220242
2014-12-22 11:47:39.5,761139395,1.51725,-0.11075,1.74625,760.1978,8.794267,NAN,12.16629,99.24676,0,11.80331,1.2455
2014-12-22 11:47:39.6,761139396,1.95925,-0.34375,0.6390001,760.7865,8.813287,NAN,12.07193,99.24676,0,11.61206,1.227055
2014-12-22 11:47:39.7,761139397,1.3605,-0.455,1.206,760.8245,8.806199,NAN,12.12418,99.25624,0,11.70769,1.235777
2014-12-22 11:47:39.8,761139398,0.5765,-0.9160001,1.22775,760.9031,8.796409,NAN,12.12582,99.22072,0,11.70769,1.236716


In [46]:
head(data_split[[29]])

Unnamed: 0,TIMESTAMP,RECORD,Ux,Uy,Uz,co2,h2o,n2o,Ts,press,diag_csat,t_hmp,e_hmp
28000001,2015-01-23 21:34:19.3,789139393,1.88225,-1.31275,-0.23325,793.3365,5.768875,NAN,6.093353,99.74144,0,5.962234,0.7584324
28000002,2015-01-23 21:34:19.4,789139394,1.937,-1.206,-0.29425,793.7191,5.77509,NAN,6.031708,99.75114,0,5.962234,0.7590697
28000003,2015-01-23 21:34:19.5,789139395,1.898,-1.00475,-0.421,793.7352,5.782685,NAN,6.065033,99.75114,0,5.962234,0.7590697
28000004,2015-01-23 21:34:19.6,789139396,2.1945,-1.625,-0.7040001,793.5444,5.796525,NAN,6.06839,99.75114,0,5.962234,0.7590697
28000005,2015-01-23 21:34:19.7,789139397,2.717,-1.0795,-0.7765,793.8083,5.795643,NAN,6.005035,99.75114,0,5.962234,0.7590697
28000006,2015-01-23 21:34:19.8,789139398,2.631,-0.65525,-0.20025,793.7773,5.798342,NAN,6.101715,99.74144,0,6.249413,0.776888


In [17]:
nrow(data_split[[1]])

In [19]:
nrow(data_split[[29]])

In [43]:
# save each chunk as a new CSV file
for (n in seq_along(data_split)) {
  write.table(
    data_split[[n]],
    paste(
      "data/CSV/TOA5_2806.", "ts_data.", sprintf("%02d", n), ".csv", sep = ""
    ),
    row.names = FALSE, sep = ",", quote = FALSE
  )
}

## TOA5_1541.ts_data.dat

Split TOA5 file (~28 mil rows, 2.6 GB) into max. 1 mil row files

In [21]:
system("sed -n '1,7p' 'data/TOA5/TOA5_1541.ts_data.dat'", intern = TRUE)

In [1]:
data <- read.csv("data/TOA5/TOA5_1541.ts_data.dat", skip = 4, header = FALSE)

In [3]:
headers <- read.csv(
  "data/TOA5/TOA5_1541.ts_data.dat", skip = 1, header = FALSE, nrows = 3
)

In [4]:
headers

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,V11
TIMESTAMP,RECORD,Ux,Uy,Uz,co2,h2o,Ts,press,diag_csat,diag_irga
TS,RN,m/s,m/s,m/s,mg/(m^3),g/(m^3),C,kPa,unitless,unitless
,,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp


In [5]:
write.table(
  headers, "data/CSV/TOA5_1541.ts_data.headers.csv", row.names = FALSE,
  col.names = FALSE, sep = ",", quote = FALSE
)

In [6]:
names(data) <- as.matrix(
  read.csv(
    "data/TOA5/TOA5_1541.ts_data.dat", skip = 1, header = FALSE, nrows = 1
  )
)

In [7]:
head(data)

TIMESTAMP,RECORD,Ux,Uy,Uz,co2,h2o,Ts,press,diag_csat,diag_irga
2014-12-22 11:34:40.5,0,7.625,-0.60975,-1.09925,679.6425,6.929612,11.20652,99.57032,0,0
2014-12-22 11:34:40.6,1,8.6485,-1.2965,-0.332,679.6145,6.925771,11.27213,99.57032,0,0
2014-12-22 11:34:40.7,2,6.9455,-0.5362501,1.85525,679.9779,6.920726,11.40673,99.57032,0,0
2014-12-22 11:34:40.8,3,5.9675,0.0405,2.43275,679.7805,6.925632,11.24857,99.57032,0,0
2014-12-22 11:34:40.9,4,6.41425,-1.23175,0.9755,679.8032,6.919812,11.25699,99.53477,0,0
2014-12-22 11:34:41,5,8.913501,1.646,0.9875001,679.8406,6.957347,11.82599,99.53477,0,0


In [8]:
summary(data)

                 TIMESTAMP            RECORD               Ux        
 2014-12-22 11:34:40.5:       1   Min.   :       0   Min.   :-65.53  
 2014-12-22 11:34:40.6:       1   1st Qu.: 7121824   1st Qu.:  0.35  
 2014-12-22 11:34:40.7:       1   Median :14243648   Median :  1.25  
 2014-12-22 11:34:40.8:       1   Mean   :14243648   Mean   :  1.82  
 2014-12-22 11:34:40.9:       1   3rd Qu.:21365471   3rd Qu.:  2.91  
 2014-12-22 11:34:41  :       1   Max.   :28487295   Max.   : 65.53  
 (Other)              :28487290                      NA's   :104082  
       Uy               Uz              co2               h2o         
 Min.   :-65.53   Min.   :-40.70   Min.   :-7014.9   Min.   : -3.588  
 1st Qu.: -0.99   1st Qu.: -0.27   1st Qu.:  706.7   1st Qu.:  3.647  
 Median :  0.08   Median :  0.03   Median :  728.5   Median :  4.827  
 Mean   :  0.23   Mean   :  0.07   Mean   :  848.0   Mean   :  6.664  
 3rd Qu.:  1.46   3rd Qu.:  0.41   3rd Qu.:  908.0   3rd Qu.:  8.177  
 Max.   : 65.5

In [9]:
nrow(data)

In [10]:
data_split <- split(data, (as.numeric(rownames(data)) - 1) %/% 1000000)

In [11]:
str(data_split)

List of 29
 $ 0 :'data.frame':	1000000 obs. of  11 variables:
  ..$ TIMESTAMP: Factor w/ 28487296 levels "2014-12-22 11:34:40.5",..: 1 2 3 4 5 6 7 8 9 10 ...
  ..$ RECORD   : int [1:1000000] 0 1 2 3 4 5 6 7 8 9 ...
  ..$ Ux       : num [1:1000000] 7.62 8.65 6.95 5.97 6.41 ...
  ..$ Uy       : num [1:1000000] -0.6098 -1.2965 -0.5363 0.0405 -1.2317 ...
  ..$ Uz       : num [1:1000000] -1.099 -0.332 1.855 2.433 0.976 ...
  ..$ co2      : num [1:1000000] 680 680 680 680 680 ...
  ..$ h2o      : num [1:1000000] 6.93 6.93 6.92 6.93 6.92 ...
  ..$ Ts       : num [1:1000000] 11.2 11.3 11.4 11.2 11.3 ...
  ..$ press    : num [1:1000000] 99.6 99.6 99.6 99.6 99.5 ...
  ..$ diag_csat: Factor w/ 18 levels "0","1","10","11",..: 1 1 1 1 1 1 1 1 1 1 ...
  ..$ diag_irga: int [1:1000000] 0 0 0 0 0 0 0 0 0 0 ...
 $ 1 :'data.frame':	1000000 obs. of  11 variables:
  ..$ TIMESTAMP: Factor w/ 28487296 levels "2014-12-22 11:34:40.5",..: 1000001 1000002 1000003 1000004 1000005 1000006 1000007 1000008 1000009 1

In [12]:
for (n in seq_along(data_split)) {
  write.table(
    data_split[[n]],
    paste(
      "data/CSV/TOA5_1541.", "ts_data.", sprintf("%02d", n), ".csv", sep = ""
    ),
    row.names = FALSE, sep = ",", quote = FALSE
  )
}

## TOA5_3989.ts_data.dat

Split TOA5 file (~5.6 mil rows, ~500 MB) into max. 1 mil row files

In [22]:
system("sed -n '1,7p' 'data/TOA5/TOA5_3989.ts_data.dat'", intern = TRUE)

In [13]:
data <- read.csv("data/TOA5/TOA5_3989.ts_data.dat", skip = 4, header = FALSE)

In [14]:
headers <- read.csv(
  "data/TOA5/TOA5_3989.ts_data.dat", skip = 1, header = FALSE, nrows = 3
)

In [15]:
headers

V1,V2,V3,V4,V5,V6,V7,V8,V9
TIMESTAMP,RECORD,Ux,Uy,Uz,Ts,co2,h2o,press
TS,RN,m/s,m/s,m/s,C,mg/m^3,g/m^3,kPa
,,Smp,Smp,Smp,Smp,Smp,Smp,Smp


In [16]:
write.table(
  headers, "data/CSV/TOA5_3989.ts_data.headers.csv", row.names = FALSE,
  col.names = FALSE, sep = ",", quote = FALSE
)

In [17]:
names(data) <- as.matrix(
  read.csv(
    "data/TOA5/TOA5_3989.ts_data.dat", skip = 1, header = FALSE, nrows = 1
  )
)

In [18]:
head(data)

TIMESTAMP,RECORD,Ux,Uy,Uz,Ts,co2,h2o,press
2014-11-19 12:01:08.1,527133419,-1.03475,3.51025,-0.03675,16.12335,602.0576,8.069658,99.81107
2014-11-19 12:01:08.2,527133420,-1.18775,3.2405,0.1125,16.17084,601.5311,8.136931,99.83704
2014-11-19 12:01:08.3,527133421,-1.3305,3.3055,0.04375,16.12335,600.9059,8.225385,99.81107
2014-11-19 12:01:08.4,527133422,-1.5575,4.13675,0.1015,16.20645,600.8302,8.259484,99.78508
2014-11-19 12:01:08.5,527133423,-2.05025,4.57125,0.4515,16.15726,601.2501,8.192789,99.83037
2014-11-19 12:01:08.6,527133424,-2.288,4.96575,0.5700001,16.35754,601.7148,8.093515,99.77545


In [19]:
summary(data)

                 TIMESTAMP           RECORD                Ux        
 2014-11-19 12:01:08.1:      1   Min.   :527133419   Min.   :-65.53  
 2014-11-19 12:01:08.2:      1   1st Qu.:528551702   1st Qu.: -0.92  
 2014-11-19 12:01:08.3:      1   Median :529969986   Median :  0.34  
 2014-11-19 12:01:08.4:      1   Mean   :529969986   Mean   :  0.09  
 2014-11-19 12:01:08.5:      1   3rd Qu.:531388270   3rd Qu.:  1.28  
 2014-11-19 12:01:08.6:      1   Max.   :532806553   Max.   : 65.53  
 (Other)              :5673129                       NA's   :182988  
       Uy               Uz               Ts              co2         
 Min.   :-65.53   Min.   :-48.29   Min.   :-38.26   Min.   : -640.9  
 1st Qu.:  0.16   1st Qu.: -0.18   1st Qu.: 11.34   1st Qu.:  619.1  
 Median :  1.16   Median : -0.04   Median : 14.08   Median :  632.9  
 Mean   :  1.36   Mean   : -0.02   Mean   : 14.65   Mean   :  823.5  
 3rd Qu.:  2.86   3rd Qu.:  0.09   3rd Qu.: 16.37   3rd Qu.:  852.2  
 Max.   : 65.53   Ma

In [20]:
nrow(data)

In [21]:
data_split <- split(data, (as.numeric(rownames(data)) - 1) %/% 1000000)

In [22]:
str(data_split)

List of 6
 $ 0:'data.frame':	1000000 obs. of  9 variables:
  ..$ TIMESTAMP: Factor w/ 5673135 levels "2014-11-19 12:01:08.1",..: 1 2 3 4 5 6 7 8 9 10 ...
  ..$ RECORD   : int [1:1000000] 527133419 527133420 527133421 527133422 527133423 527133424 527133425 527133426 527133427 527133428 ...
  ..$ Ux       : num [1:1000000] -1.03 -1.19 -1.33 -1.56 -2.05 ...
  ..$ Uy       : num [1:1000000] 3.51 3.24 3.31 4.14 4.57 ...
  ..$ Uz       : num [1:1000000] -0.0367 0.1125 0.0437 0.1015 0.4515 ...
  ..$ Ts       : num [1:1000000] 16.1 16.2 16.1 16.2 16.2 ...
  ..$ co2      : num [1:1000000] 602 602 601 601 601 ...
  ..$ h2o      : num [1:1000000] 8.07 8.14 8.23 8.26 8.19 ...
  ..$ press    : num [1:1000000] 99.8 99.8 99.8 99.8 99.8 ...
 $ 1:'data.frame':	1000000 obs. of  9 variables:
  ..$ TIMESTAMP: Factor w/ 5673135 levels "2014-11-19 12:01:08.1",..: 1000001 1000002 1000003 1000004 1000005 1000006 1000007 1000008 1000009 1000010 ...
  ..$ RECORD   : int [1:1000000] 528133419 528133420 52813342

In [23]:
for (n in seq_along(data_split)) {
  write.table(
    data_split[[n]],
    paste(
      "data/CSV/TOA5_3989.", "ts_data.", sprintf("%02d", n), ".csv", sep = ""
    ),
    row.names = FALSE, sep = ",", quote = FALSE
  )
}

## TOA5_3989.flux.dat

In [47]:
system("sed -n '1,7p' 'data/TOA5/TOA5_3989.flux.dat'", intern = TRUE)

In [48]:
data <- read.csv("data/TOA5/TOA5_3989.flux.dat", skip = 4, header = FALSE)

In [49]:
headers <- read.csv(
  "data/TOA5/TOA5_3989.flux.dat", skip = 1, header = FALSE, nrows = 3
)

In [50]:
headers

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,⋯,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
TS,RN,mg/(m^2 s),W/m^2,W/m^2,W/m^2,kg/(m s^2),m/s,C,m C/s,⋯,unitless,mg/(m^2 s),W/m^2,mg/(m^2 s),mg/(m^2 s),W/m^2,W/m^2,g/m^3,C,percent
,,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,⋯,Avg,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp


In [51]:
write.table(
  headers, "data/CSV/TOA5_3989.flux.headers.csv", row.names = FALSE,
  col.names = FALSE, sep = ",", quote = FALSE
)

In [52]:
names(data) <- as.matrix(
  read.csv("data/TOA5/TOA5_3989.flux.dat", skip = 1, header = FALSE, nrows = 1)
)

In [53]:
head(data)

Unnamed: 0_level_0,TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2014-11-19 12:30:00,29287,-0.02738776,41.11178,27.61276,24.76925,0.2095944,0.4146389,0.3397227,-0.01496416,⋯,56.0,-0.08351655,39.15887,0.01285764,0.04327115,0.410987,1.541922,7.890273,11.29643,77.49195
2,2014-11-19 13:00:00,29288,-0.03498442,46.15891,28.56569,25.41201,0.1617521,0.3645584,0.255728,0.008030742,⋯,56.0,-0.09378111,44.07496,0.01447388,0.04432281,0.4691145,1.614831,7.987618,11.80993,75.96396
3,2014-11-19 13:30:00,29289,0.1808234,-6.167122,-24.50253,-23.7634,0.1895548,0.3944006,0.8363222,-1.465492,⋯,56.0,0.2240997,-4.744839,-0.00156682,-0.04170947,-0.04999511,-1.372288,7.917885,11.58026,76.34363
4,2014-11-19 14:00:00,29290,2.181447,-36.54642,-8.166809,-5.908159,0.07494499,0.246819,0.8353181,-0.8943371,⋯,59.64111,2.224159,-35.73215,-0.02265313,-0.02005932,-0.3661243,-0.4481432,7.774566,9.416086,85.95309
5,2014-11-19 14:30:00,29291,-0.07790546,37.35801,-5.19113,-7.278997,0.03453151,0.1677954,0.3394781,-0.03034091,⋯,56.0,-0.07737738,37.25424,0.01248777,-0.01301585,0.4042646,-0.3004996,8.205495,10.59023,84.24697
6,2014-11-19 15:00:00,29292,-0.01854837,29.91426,-19.60791,-21.03238,0.08993658,0.2712201,0.2265035,-0.1086466,⋯,56.0,0.008064821,30.7132,0.01006716,-0.03668035,0.3333376,-1.13228,8.181075,11.32896,80.18222


In [54]:
summary(data)

  TIMESTAMP             RECORD          Fc_wpl              LE_wpl        
 Length:315         Min.   :29287   Min.   :-30.77266   Min.   :-989.053  
 Class :character   1st Qu.:29366   1st Qu.: -0.03088   1st Qu.:  -2.588  
 Mode  :character   Median :29444   Median :  0.00539   Median :   4.270  
                    Mean   :29444   Mean   :  0.42724   Mean   :  40.929  
                    3rd Qu.:29522   3rd Qu.:  0.04355   3rd Qu.:  25.208  
                    Max.   :29601   Max.   : 44.67786   Max.   :3450.499  
                                    NA's   :33          NA's   :33        
       Hs                Hc                tau              u_star       
 Min.   :-293.11   Min.   :-281.913   Min.   :0.00014   Min.   :0.01061  
 1st Qu.: -31.69   1st Qu.: -33.786   1st Qu.:0.01121   1st Qu.:0.09429  
 Median : -16.97   Median : -17.278   Median :0.05655   Median :0.21326  
 Mean   : -19.75   Mean   : -21.704   Mean   :0.12857   Mean   :0.25505  
 3rd Qu.:  -3.43   3rd Qu.:  -

In [55]:
nrow(data)

In [56]:
write.table(
  headers, "data/CSV/TOA5_3989.flux.csv", row.names = FALSE,
  sep = ",", quote = FALSE
)

## TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2015).dat

In [36]:
system(
  "sed -n '1,7p' 'data/TOA5/TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2015).dat'",
  intern = TRUE
)

In [37]:
data <- read.csv(
  "data/TOA5/TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2015).dat",
  skip = 4, header = FALSE
)

In [38]:
headers <- read.csv(
  "data/TOA5/TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2015).dat",
  skip = 1, header = FALSE, nrows = 3
)

In [39]:
headers

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,⋯,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
TS,RN,mg/(m^2 s),W/m^2,W/m^2,W/m^2,kg/(m s^2),m/s,C,m C/s,⋯,unitless,mg/(m^2 s),W/m^2,mg/(m^2 s),mg/(m^2 s),W/m^2,W/m^2,g/m^3,C,percent
,,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,⋯,Avg,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp


In [40]:
write.table(
  headers, "data/CSV/TOA5_3989.2015.flux.headers.csv", row.names = FALSE,
  col.names = FALSE, sep = ",", quote = FALSE
)

In [41]:
names(data) <- as.matrix(
  read.csv(
    "data/TOA5/TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2015).dat",
    skip = 1, header = FALSE, nrows = 1
  )
)

In [42]:
head(data)

Unnamed: 0_level_0,TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2015-04-16 10:30:00,33528,-0.07050741,75.43386,139.7478,133.3558,0.01570627,0.1136141,0.7702006,0.0287227,⋯,56,-0.3332449,67.49818,0.02296296,0.2397746,0.6645014,7.271169,7.389947,13.13227,64.73261
2,2015-04-16 11:00:00,33529,-0.06113067,97.84227,183.5227,175.0836,0.006824024,0.07498017,0.8852791,0.0007770061,⋯,56,-0.4046633,87.37871,0.02970162,0.313831,0.8665181,9.597045,7.42556,13.78645,62.4719
3,2015-04-16 11:30:00,33530,0.009571165,104.2355,199.8177,190.7273,0.08037375,0.257495,0.9643432,-0.291831,⋯,56,-0.3658124,92.67695,0.03170475,0.3436788,0.9354095,10.6231,7.546908,14.12835,62.17416
4,2015-04-16 12:00:00,33531,-0.05365613,114.7807,169.498,160.3188,0.03706582,0.1750383,0.8076537,0.1571966,⋯,56,-0.3771344,104.774,0.0357648,0.2877135,1.052753,8.953876,7.498158,14.66202,59.78199
5,2015-04-16 12:30:00,33532,-0.05869639,122.2372,205.8765,195.6567,0.0580382,0.2191867,0.9739804,-0.2421052,⋯,56,-0.4449696,110.4093,0.03748565,0.3487876,1.096131,10.73175,7.398589,15.02505,57.70282
6,2015-04-16 13:00:00,33533,-0.03985274,111.9211,187.7205,178.3878,0.03275815,0.1647807,0.8736999,0.07383512,⋯,56,-0.3915091,101.3153,0.0343656,0.3172907,0.9897227,9.615979,7.271001,15.38198,55.4857


In [43]:
summary(data)

  TIMESTAMP             RECORD          Fc_wpl              LE_wpl         
 Length:1353        Min.   :33528   Min.   :-26.63347   Min.   :-1517.564  
 Class :character   1st Qu.:33866   1st Qu.: -0.05101   1st Qu.:    3.198  
 Mode  :character   Median :34204   Median :  0.00340   Median :   29.763  
                    Mean   :34204   Mean   :  0.28041   Mean   :   44.565  
                    3rd Qu.:34542   3rd Qu.:  0.04476   3rd Qu.:   79.674  
                    Max.   :34880   Max.   : 37.98938   Max.   : 3423.436  
                                    NA's   :25          NA's   :25         
       Hs                  Hc                 tau               u_star       
 Min.   :-289.0884   Min.   :-282.8151   Min.   :0.000322   Min.   :0.01613  
 1st Qu.: -20.8547   1st Qu.: -20.9109   1st Qu.:0.039111   1st Qu.:0.17856  
 Median :  -0.3793   Median :  -0.6386   Median :0.140703   Median :0.33886  
 Mean   :  28.5935   Mean   :  25.6023   Mean   :0.202495   Mean   :0.34968  
 3

In [44]:
nrow(data)

In [45]:
write.table(
  headers, "data/CSV/TOA5_3989.2015.flux.csv", row.names = FALSE,
  sep = ",", quote = FALSE
)

## TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2016).dat

In [57]:
system(
  "sed -n '1,7p' 'data/TOA5/TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2016).dat'",
  intern = TRUE
)

In [58]:
data <- read.csv(
  "data/TOA5/TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2016).dat",
  skip = 4, header = FALSE
)

In [59]:
headers <- read.csv(
  "data/TOA5/TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2016).dat",
  skip = 1, header = FALSE, nrows = 3
)

In [60]:
headers

V1,V2,V3,V4,V5,V6,V7,V8,V9,V10,⋯,V45,V46,V47,V48,V49,V50,V51,V52,V53,V54
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,⋯,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
TS,RN,mg/(m^2 s),W/m^2,W/m^2,W/m^2,kg/(m s^2),m/s,C,m C/s,⋯,unitless,mg/(m^2 s),W/m^2,mg/(m^2 s),mg/(m^2 s),W/m^2,W/m^2,g/m^3,C,percent
,,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,⋯,Avg,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp,Smp


In [61]:
write.table(
  headers, "data/CSV/TOA5_3989.2016.flux.headers.csv", row.names = FALSE,
  col.names = FALSE, sep = ",", quote = FALSE
)

In [62]:
names(data) <- as.matrix(
  read.csv(
    "data/TOA5/TOA5_3989.flux (2021_09_17 14_49_19 UTC) (2016).dat",
    skip = 1, header = FALSE, nrows = 1
  )
)

In [63]:
head(data)

Unnamed: 0_level_0,TIMESTAMP,RECORD,Fc_wpl,LE_wpl,Hs,Hc,tau,u_star,stdev_Ts,cov_Ts_Ux,⋯,agc_Avg,Fc_irga,LE_irga,co2_wpl_LE,co2_wpl_H,h2o_wpl_LE,h2o_wpl_H,h2o_hmp_mean,t_hmp_mean,rh_hmp_mean
Unnamed: 0_level_1,<chr>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,2016-06-09 08:30:00,44801,-0.1307632,37.03913,25.60763,23.3344,0.1181751,0.3133757,0.2528313,-0.01505542,⋯,50,-0.1936334,34.59666,0.0138582,0.04901194,0.5003809,1.942085,10.70669,15.89368,79.20306
2,2016-06-09 09:00:00,44802,-0.1701145,54.49001,62.58549,59.215,0.1070421,0.298587,0.4525773,0.008037789,⋯,50,-0.3134294,48.85725,0.01951842,0.1237964,0.7311883,4.901563,11.05024,16.5283,78.67393
3,2016-06-09 09:30:00,44803,-0.168336,60.68195,68.70016,64.93797,0.09754083,0.2851819,0.3828293,-0.06610256,⋯,50,-0.325776,54.51952,0.02178382,0.1356562,0.8121096,5.350313,10.98709,16.78893,77.00739
4,2016-06-09 10:00:00,44804,-0.1584697,58.81278,63.49821,59.84708,0.1149845,0.3098198,0.3687277,-0.118348,⋯,50,-0.304428,53.02826,0.02116952,0.1247887,0.797385,4.987134,11.07698,17.0913,76.24122
5,2016-06-09 10:30:00,44805,-0.1508173,51.99655,66.14544,62.90998,0.1118995,0.3056701,0.3978571,-0.0784955,⋯,50,-0.3005183,46.12586,0.01842926,0.1312717,0.6915368,5.179154,11.04192,17.11374,75.89794
6,2016-06-09 11:00:00,44806,-0.2001991,79.05721,114.4625,109.5288,0.1561154,0.3614822,0.5429151,0.02038099,⋯,50,-0.4554492,68.96523,0.02751203,0.227738,1.045732,9.04624,11.13956,17.71659,73.86147


In [64]:
summary(data)

  TIMESTAMP             RECORD          Fc_wpl               LE_wpl         
 Length:1355        Min.   :44801   Min.   :-6.065e+07   Min.   :-1136.733  
 Class :character   1st Qu.:45140   1st Qu.: 0.000e+00   1st Qu.:    8.797  
 Mode  :character   Median :45478   Median : 0.000e+00   Median :   37.100  
                    Mean   :45478   Mean   : 3.569e+15   Mean   :   55.730  
                    3rd Qu.:45816   3rd Qu.: 0.000e+00   3rd Qu.:   91.187  
                    Max.   :46155   Max.   : 4.743e+18   Max.   : 2474.965  
                                    NA's   :26           NA's   :26         
       Hs                 Hc               tau               u_star       
 Min.   :-360.390   Min.   :-346.11   Min.   :0.000574   Min.   :0.02169  
 1st Qu.:  -9.702   1st Qu.: -10.41   1st Qu.:0.055775   1st Qu.:0.21442  
 Median :   6.950   Median :   5.58   Median :0.116427   Median :0.31113  
 Mean   :  30.313   Mean   :  26.96   Mean   :0.176259   Mean   :0.33535  
 3rd Qu.:

In [65]:
nrow(data)

In [66]:
write.table(
  headers, "data/CSV/TOA5_3989.2016.flux.csv", row.names = FALSE,
  sep = ",", quote = FALSE
)