# Process ICOS-2023 data
- replace sentinel values with NaN
- downsample from half-hourly to hourly (if needed for each site)

In [1]:
import os
import numpy as np
import pandas as pd
import shutil

In [2]:
COLS_VA = ['TA_F', 'SW_IN_F', 'LW_IN_F', 'VPD_F', 'PA_F', 'P_F', 'WS_F', 'WD', 'RH', 'USTAR', 'NETRAD', 'PPFD_IN', 'PPFD_DIF', 'PPFD_OUT', 'SW_DIF', 'SW_OUT', 'LW_OUT',
        'CO2_F_MDS', 'G_F_MDS', 'LE_F_MDS', 'H_F_MDS', 'NEE_VUT_REF', 'RECO_NT_VUT_REF', 'RECO_DT_VUT_REF', 'GPP_NT_VUT_REF', 'GPP_DT_VUT_REF']
COLS_QC = [f'{c}_QC' for c in COLS_VA]
COLS_TS = ['TIMESTAMP_START']

collection = 'icos-2023'

INPUT_DIR = os.path.join('data', 'raw', collection, 'unzipped')
META_FILE = os.path.join('data', 'raw', collection, 'site_data.csv')
OUTPUT_DIR = os.path.join('data', 'intermediate', 'test_int_1', collection)

In [3]:
def process_site_dataframe(df, downsample=True):
    df = df.replace(-9999.0, np.nan)
    for column in COLS_VA + COLS_QC:
        if column not in df.columns:
            df[column] = np.nan
    
    df_ts = df[COLS_TS]
    df_va = df[COLS_VA]
    df_qc = df[COLS_QC]

    if downsample:
        # Average to hourly data
        grouping_key = np.arange(len(df_va)) // 2
        df_va = df_va.groupby(grouping_key).mean().reset_index(drop=True)
        df_ts = df_ts.iloc[::2,:].reset_index(drop=True)
        df_qc = df_qc.iloc[::2,:].reset_index(drop=True)
    
    # Double precipitation, as this should not be averaged
    df_va['P_F'] = df_va['P_F'] * 2.0
    df = pd.concat([df_ts, df_va, df_qc], axis=1)
    return df

In [4]:
data = []
for site in os.listdir(INPUT_DIR):
    files = os.listdir(os.path.join(INPUT_DIR, site))
    fluxnet_compatible_files = [f for f in files if 'FLUXNET_HH_L2' in f and 'VARINFO' not in f]
    if len(fluxnet_compatible_files) != 1:
        print(f'ERROR: No compatible file found for {site}')
        continue
    file = fluxnet_compatible_files[0]
    data.append((site, os.path.join(INPUT_DIR, site, file), collection, True))

ERROR: No compatible file found for DE-BeR
ERROR: No compatible file found for FI-Kvr
ERROR: No compatible file found for FI-Kmp


In [5]:
if not os.path.exists(OUTPUT_DIR):
    os.makedirs(OUTPUT_DIR)

for site, file, source, downsample in data:
    print(f'Processing {file}...')
    site_dir = os.path.join(OUTPUT_DIR, site)
    if not os.path.exists(site_dir):
        os.makedirs(site_dir)
    
    site_df = pd.read_csv(file)
    processed_df = process_site_dataframe(site_df, downsample=downsample)
    min_time = processed_df['TIMESTAMP_START'].min()
    max_time = processed_df['TIMESTAMP_START'].max()
    outfile = os.path.join(site_dir, f'{min_time}_{max_time}_{source}.csv')
    processed_df.to_csv(outfile, index=False)

Processing data/raw/icos-2023/unzipped/IT-Tor/ICOSETC_IT-Tor_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/FR-LGt/ICOSETC_FR-LGt_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/DE-Har/ICOSETC_DE-Har_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/CH-Dav/ICOSETC_CH-Dav_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/FI-Hyy/ICOSETC_FI-Hyy_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/IT-Ren/ICOSETC_IT-Ren_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/DE-Msr/ICOSETC_DE-Msr_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/FR-Lam/ICOSETC_FR-Lam_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/SE-Htm/ICOSETC_SE-Htm_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/SE-Svb/ICOSETC_SE-Svb_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/DE-Hai/ICOSETC_DE-Hai_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/FR-Bil/ICOSETC_FR-Bil_FLUXNET_HH_L2.csv...
Processing data/

  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan
  df[column] = np.nan


Processing data/raw/icos-2023/unzipped/CD-Ygb/ICOSETC_CD-Ygb_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/DE-HoH/ICOSETC_DE-HoH_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/FR-Mej/ICOSETC_FR-Mej_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/BE-Vie/ICOSETC_BE-Vie_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/FR-Aur/ICOSETC_FR-Aur_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/DE-Geb/ICOSETC_DE-Geb_FLUXNET_HH_L2.csv...
Processing data/raw/icos-2023/unzipped/FI-Let/ICOSETC_FI-Let_FLUXNET_HH_L2.csv...


In [3]:
shutil.copyfile(META_FILE, os.path.join(OUTPUT_DIR, 'site_data.csv'))

'data/intermediate/test_int_1/icos-2023/site_data.csv'