In [None]:
    ###### OSVAS ###################################
    ###### ( OFFLINE SURFEX VALIDATION SYSTEM)######
    #### STEP 1: ICOS AUTHENTICATION ###############

#A username/password account for the ICOS authentication service is required for this.
#Obfuscated (not readable by humans) password is stored in a file on the local machine in a default user-specific
#folder. To initialize this file, run the following code interactively 
#(only needs to be done once for every machine):

from icoscp_core.icos import auth
#auth.init_config_file()
#obj_flux='https://meta.icos-cp.eu/objects/dDlpnhS3XKyZjB22MUzP_nAm'

#dobj_flux=Dobj(obj_flux).data
#nBTx3mW2Y2wdiHD

In [1]:
###### OSVAS ########################################################
###### ( OFFLINE SURFEX VALIDATION SYSTEM)###########################
#### STEP 2: IMPORTING NEEDED PACKAGES AND DEFINING FUNCTIONS ###############
import icoscp
from icoscp.dobj import Dobj
import sys
sys.path.append("~/.local/lib/python3.10/site-packages/")
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import scipy.stats as stats
import numpy as np
from datetime import date, datetime, timedelta
import os
import time
import meteo
# Note: meteo package available here: https://github.com/hendrikwout/meteo
from pathlib import Path
import datetime as dt
import xarray as xr
from netCDF4 import Dataset, date2num
import cftime

plt.rcParams['figure.dpi'] = 500
##############################################################################
##Here comes a series of functions for handling the forcing creation easily ##
##############################################################################
def datespan(startDate, endDate, delta=timedelta(days=1)):
    currentDate = startDate
    while currentDate < endDate:
        yield currentDate
        currentDate += delta

def despike(pandas_column, sigma_delta):
   # This function substitutes spikes in data (replaced by nan)
   # The finding criteria for a spike is that contiguous values differ by
   # more than 3*sigma wrt the distribution of series of differences between consecutive
   # values
   delta_z_abs=np.abs(stats.zscore(pandas_column.diff(1),nan_policy='omit'))
   delta_z_abs[0]=0 # Make first value zero instead of nan
   pandas_column_filtered=pandas_column.where(delta_z_abs<sigma_delta, np.nan)
   return pandas_column_filtered

def reduce_pres(pandas_column,station_height):
    #This function reduces pressure in Pa from surface to station height
    #It assumes a pressure gradient of 100 Pa each 9m
    pandas_column_reduced=pandas_column-100*station_height/9
    return pandas_column_reduced

def rh2ah(RH,p,T):
   '''conversion relative humidity to absolute humidity (kg Water per m^3 Air)'''
   mixr=meteo.humidity.rh2mixr(RH, p,T)
   sh=meteo.humidity.mixr2sh(mixr)
   return  sh*meteo.air.rhov(T,p)

        
def create_lockfile(Forcing_path):
    lockfile = os.path.join(Forcing_path, ".lockfile")
    if os.path.exists(lockfile):
        raise FileExistsError(f"Error: A lockfile already exists in {Forcing_path}. Delete it before re-running.")
    with open(lockfile, "w") as f:
        f.write("LOCKED")

################ Write forcing and Params_config file in ascii  ##############################3

def write_forcing_ascii(Forcing_vars, Forcing_path, Station_forcing, run_start, run_end, 
                        delta_t, lon, lat, alt, height_T, height_V, write_forcing_run='yes'):
    """
    Writes forcing variable files and a Params_config.txt metadata file for a simulation run.

    Args:
        Forcing_vars (list): List of forcing variable names.
        Forcing_path (str): Path to save the forcing files.
        Station_forcing (DataFrame): Dataframe containing forcing data.
        run_start (str): Start time index for data selection.
        run_end (str): End time index for data selection.
        delta_t (float): Time step (seconds).
        lon (float): Longitude of the station.
        lat (float): Latitude of the station.
        alt (float): Altitude of the station.
        height_T (float): Temperature measurement height.
        height_V (float): Wind measurement height.
        write_forcing_run (str, optional): Whether to write output files ('yes' to write). Default is 'yes'.

    Returns:
        None
    """    
    os.makedirs(Forcing_path, exist_ok=True)
    lockfile = os.path.join(Forcing_path, ".lockfile")
    if write_forcing_run.lower() == 'yes':
        if os.path.exists(lockfile):
            raise FileExistsError(f"Error: A lockfile exists in {Forcing_path}. Delete the lockfile before re-running.")
        create_lockfile(Forcing_path)
    
    try:
        Station_forcing_run = Station_forcing.loc[run_start:run_end]
        for var in Forcing_vars:
            print(f"{var} with {Station_forcing_run[var].isna().sum()} NaNs")
            fig, ax = plt.subplots(figsize=(17, 5))
            Station_forcing_run[var].plot(ax=ax, label=f"{var} no_filter")
            ax.set_ylabel(var)
            plt.legend()
            plt.show()
            if write_forcing_run.lower() == 'yes':
                np.savetxt(os.path.join(Forcing_path, f"{var}.txt"), 
                           Station_forcing_run[var].fillna(method='bfill').fillna(method='ffill').values, fmt='%.6f')
        
        params_lines = [
            1, len(Station_forcing_run), delta_t,
            Station_forcing_run.index[1].year, Station_forcing_run.index[1].month,
            Station_forcing_run.index[1].day, Station_forcing_run.index[1].hour,
            lon, lat, alt, height_T, height_V
        ]
        
        if write_forcing_run.lower() == 'yes':
            with open(os.path.join(Forcing_path, "Params_config.txt"), 'w') as f:
                f.write("\n".join(map(str, params_lines)) + "\n")
    except Exception as e:
        print(f"Error encountered: {e}")
        raise

################ Write daily forcing files in netcdf  ##############################
        
def write_forcing_netcdf(Forcing_vars, Forcing_path, Station_forcing, run_start, run_end,
                         delta_t, lon, lat, alt, height_T, height_V, write_forcing_run='yes'):
    """
    Writes forcing variable file in netcdf.

    Args:
        Forcing_vars (list): List of forcing variable names.
        Forcing_path (str): Path to save the forcing files.
        Station_forcing (DataFrame): Dataframe containing forcing data.
        run_start (str): Start time index for data selection.
        run_end (str): End time index for data selection.
        delta_t (float): Time step (seconds).
        lon (float): Longitude of the station.
        lat (float): Latitude of the station.
        alt (float): Altitude of the station.
        height_T (float): Temperature measurement height.
        height_V (float): Wind measurement height.
        write_forcing_run (str, optional): Whether to write output files ('yes' to write). Default is 'yes'.

    Returns:
        None
    """
    os.makedirs(Forcing_path, exist_ok=True)
    
    existing_files = set()
    time_range = pd.date_range(run_start, run_end, freq="D")
    
    for date in time_range:
        forcing_filename = os.path.join(Forcing_path, f"FORCING.nc_{date.strftime('%Y%m%d')}")
        if os.path.exists(forcing_filename):
            existing_files.add(forcing_filename)
    
    if write_forcing_run.lower() == 'yes' and existing_files:
        raise FileExistsError(f"Error: Existing forcing files would be overwritten: {existing_files}. Delete them before re-running.")
    
    try:
        Station_forcing_run = Station_forcing.loc[run_start:run_end]
        forcing_vars = ["CO2air", "Wind_DIR", "PSurf", "Rainf", "Snowf", "Wind", "DIR_SWdown",
                        "LWdown", "Qair", "SCA_SWdown", "Tair"]
        forcing_var_map = dict(zip(Forcing_vars, forcing_vars))
        
        # Plot full period
        for forcing_key in Forcing_vars:
            fig, ax = plt.subplots(figsize=(17, 5))
            Station_forcing_run[forcing_key].plot(ax=ax, label=f"{forcing_key} no_filter")
            ax.set_ylabel(forcing_key)
            plt.legend()
            plt.show()
        
        for date in time_range:
            daily_data = Station_forcing_run.loc[date.strftime('%Y-%m-%d')]
            time_values = pd.date_range(date, periods=daily_data.shape[0], freq="h").to_pydatetime()
            
            forcing_filename = os.path.join(Forcing_path, f"FORCING.nc_{date.strftime('%Y%m%d')}")
            with Dataset(forcing_filename, "w", format="NETCDF4") as nc:
                nc.createDimension("time", len(time_values))
                nc.createDimension("Number_of_points", 1)
                
                time_var = nc.createVariable("time", "f8", ("time",))
                time_var.units = "seconds since 2014-01-01 01:00:00"
                time_var[:] = date2num(time_values, units=time_var.units, calendar='gregorian')
                
                forcing_data_vars = {var: nc.createVariable(var, "f4", ("time", "Number_of_points")) for var in forcing_vars}
                
                if write_forcing_run.lower() == 'yes':
                    for forcing_key, nc_var in forcing_var_map.items():
                        forcing_data_vars[nc_var][:] = daily_data[forcing_key].values.reshape(len(time_values), 1)
    except Exception as e:
        print(f"Error encountered: {e}")
        raise
        
        
################ Merge netcdf files for a period which is already downloaded as daily files #############

def merge_forcing_netcdf(Forcing_path, start_date, end_date, output_filename="FORCING.nc"):
    """
    Merges daily NetCDF forcing files into a single file.

    Args:
        Forcing_path (str): Path where the daily NetCDF files are stored.
        start_date (str): Start date in 'YYYY-MM-DD' format.
        end_date (str): End date in 'YYYY-MM-DD' format.
        output_filename (str, optional): Name of the merged NetCDF file. Default is 'FORCING.nc'.

    Returns:
        None
    """
    merged_filepath = os.path.join(Forcing_path, output_filename)
    
    # Generate list of daily forcing files to merge
    time_range = pd.date_range(start_date, end_date, freq="D")
    forcing_files = [os.path.join(Forcing_path, f"FORCING.nc_{date.strftime('%Y%m%d')}") for date in time_range]
    forcing_files = [f for f in forcing_files if os.path.exists(f)]
    
    if not forcing_files:
        raise FileNotFoundError("No forcing files found for the specified date range.")
    
    with Dataset(forcing_files[0], "r") as sample_nc:
        forcing_vars = [var for var in sample_nc.variables if var not in ["time", "Number_of_points"]]
    
    # Read time and variable data from all files
    merged_data = {var: [] for var in forcing_vars}
    merged_time = []
    
    for file in forcing_files:
        with Dataset(file, "r") as nc:
            merged_time.extend(nc.variables["time"][:])
            for var in forcing_vars:
                merged_data[var].append(nc.variables[var][:])
    
    # Concatenate data along the time dimension
    for var in forcing_vars:
        merged_data[var] = np.concatenate(merged_data[var], axis=0)
    
    # Write merged data to new NetCDF file
    with Dataset(merged_filepath, "w", format="NETCDF4") as nc:
        nc.createDimension("time", len(merged_time))
        nc.createDimension("Number_of_points", 1)
        
        time_var = nc.createVariable("time", "f8", ("time",))
        time_var.units = "seconds since 2014-01-01 01:00:00"
        time_var[:] = merged_time
        
        for var in forcing_vars:
            nc_var = nc.createVariable(var, "f4", ("time", "Number_of_points"))
            nc_var[:] = merged_data[var]
    
    print(f"Merged NetCDF file created: {merged_filepath}")


In [2]:
###### OSVAS #####################################################
###### ( OFFLINE SURFEX VALIDATION SYSTEM)########################
#### STEP 3: STATION METADATA AND CONFIGURATION OF ###############
#### THE FORCING GENERATION ######################################

#3.1 Define here what will be the home path for saving SURFEX forcing
#from the different stations
PROJECTDIR='OSVAS'
home = os.path.join(str(Path.home()),PROJECTDIR)  
station_name='Majadas_south_test' # A directory with this name will be created in 'home'
write_forcing_wup='no' #Set to yes for writing forcing

#3.2 Site/experiment parameters needed for Params_config.txt
#A series of if statements are used below so that one can generate
#forcings for different stations and periods.

if station_name=='Majadas_south_test':
 data_pid='https://meta.icos-cp.eu/objects/dDlpnhS3XKyZjB22MUzP_nAm'
 #Product ID (PID) of the ICOS object from where to extract forcing variables
 delta_t=1800                     # Interval in seconds between observations
 lon=-5.774722                    # Station longitude
 lat= 39.940556                   # Station latitude
 alt=258                          # Station altitude
 height_T=2                       # Height of the temperature measurement
 height_V=10                      # Height of the windspeed   measurement
 run_start='2019-2-2 00:00:00'    # Timestamp for the forcing start
 run_end='2019-3-1 00:00:00'      # Timestamp for the forcing end
 forcing_format='netcdf'          # Choose between netcdf or ascii

if station_name=='Fyodorovskoye_test':
 data_pid='https://meta.icos-cp.eu/objects/p8vfuGtKaPH90vW6WhKx3q9N'
 #ICOSCp object from where to extract forcing variables
 delta_t=1800
 lon=-5.774722
 lat= 39.940556
 alt=258
 height_T=2
 height_V=10
 run_start='2018-1-1 00:00:00'
 run_end='2019-1-1 00:00:00'
 forcing_format='ascii'          # Choose between netcdf or ascii



In [3]:
###### OSVAS #####################################################
###### ( OFFLINE SURFEX VALIDATION SYSTEM)########################
#### STEP 4: LOAD STATION DATA, ###############
#### THE FORCING GENERATION ######################################


# The example below shows how to process "Fluxnet" type data for "Majadas del Tietar " (Spain)
# or Fyodorovskoye (Rusia) Other Fluxnet data collections can be browsed here: 
# https://data.icos-cp.eu/portal/ (Write "Fluxnet product" in the field "Data type")
# For other types of data from ICOS network (i.e. ecosystem stations, oceanic...) 
# Some more extra work will be needed to adapt this notebook,
# because column names, etc... are not homogeneous


#4.1: Load Object containing variables, get only the table
#     containing the data (as a Pandas dataframe)
df=Dobj(data_pid).data
# Fluxnet data in ICOS have columns named timestamp and timestamp_end
# defining the start and end period assigned to the measurement.
# In the next lines the timestamp is substituted by the midpoint between 
# the start and the end of the measurement
df.rename(columns={'TIMESTAMP': 'TIMESTAMP_START'},inplace=True)
# Create 'average_timestamp' as the midpoint between 'timestamp_start' and 'timestamp_end'
df['TIMESTAMP'] = df['TIMESTAMP_START'] + pd.Timedelta(minutes=30)
df.set_index('TIMESTAMP',inplace=True)
# Rename the dataframe
Station_forcing = df

#4.2 Get station data
# Use a dictionary to rename original variable names to the ones
# used by SURFEX in ASCII filenames or netcdf variable names.
# Info about the forcing variables and format in ascii available here:
# https://www.umr-cnrm.fr/surfex/spip.php?article214
# Also, apply some transformations of data with another dictionary

# Define renaming dictionary
rename_dict = {
    "TA_F": "Forc_TA",
    "PA_F": "Forc_PS",
    "WS_F": "Forc_WIND",
    "LE_F_MDS": "LE",
    "H_F_MDS": "H"
}

# Define transformations dictionary
transform_dict = {
    "Forc_LW": lambda df: df["LW_IN_F"],
    "Forc_SNOW": lambda df: df["P_F"] * 0,
    "Forc_CO2": lambda df: 0.00062,
    "Forc_DIR_SW": lambda df: np.where(df["SW_IN_F"] < 0, 0, df["SW_IN_F"]),
    "Forc_SCA_SW": lambda df: df["Forc_DIR_SW"] * 0.0,  # Initialized as zero
    "Forc_RAIN": lambda df: df["P_F"] / delta_t,
    "Forc_PS": lambda df: df["Forc_PS"] * 1000,  # Convert to Pa
    "Forc_DIR": lambda df: df["Forc_WIND"] * 0,  # No wind direction, use 0
    "Forc_TA": lambda df: df["Forc_TA"] + 273.15,  # Convert to Kelvin
    "ESAT": lambda df: meteo.humidity.esat(df["Forc_TA"]),
    "E": lambda df: df["ESAT"] - df["VPD_F"] * 100,
    "Forc_QA": lambda df: rh2ah(df["E"] / df["ESAT"], df["Forc_PS"], df["Forc_TA"])
}

# Apply renaming
Station_forcing.rename(columns=rename_dict, inplace=True)

# Apply transformations
for col, func in transform_dict.items():
    Station_forcing[col] = func(Station_forcing)

# Ensure Forc_SCA_SW is explicitly zero
Station_forcing.Forc_SCA_SW.values[:]=0


In [None]:
###### OSVAS ####################################################
###### ( OFFLINE SURFEX VALIDATION SYSTEM)#######################
#### STEP 5: PLOT FORCING VARIABLES FOR THE SELECTED PERIOD #####
#### WRITE THE FORCING FILES IN THE SELECTED FILE TYPE ##########


Forcing_vars=['Forc_CO2','Forc_DIR','Forc_PS','Forc_RAIN','Forc_SNOW','Forc_WIND','Forc_DIR_SW','Forc_LW','Forc_QA','Forc_SCA_SW','Forc_TA']


if forcing_format=='ascii':
    write_forcing_ascii(
    Forcing_vars=Forcing_vars, 
    Forcing_path=home + '/' + station_name + '_run/' + 'forcing_run',
    Station_forcing=Station_forcing, 
    run_start=run_start, 
    run_end=run_end, 
    delta_t=delta_t, 
    lon=lon, lat=lat, alt=alt, 
    height_T=height_T, height_V=height_V,
    write_forcing_run='yes'
)
    
if forcing_format=='netcdf':
    write_forcing_netcdf(
    Forcing_vars=Forcing_vars, 
    Forcing_path=home + '/' + station_name + '_run/' + 'forcing_run',
    Station_forcing=Station_forcing, 
    run_start=run_start, 
    run_end=run_end, 
    delta_t=delta_t, 
    lon=lon, lat=lat, alt=alt, 
    height_T=height_T, height_V=height_V,
    write_forcing_run='yes'
)
    

In [5]:
#### Example on how to reconstruct a forcing file for a period from the individual netcdf files

merge_forcing_netcdf(Forcing_path=home + '/' + station_name + '_run/' + 'forcing_run',
                     start_date=run_start, end_date=run_end, output_filename="FORCING.nc")

Merged NetCDF file created: /home/pn56/OSVAS/Majadas_south_test_run/forcing_run/FORCING.nc


In [None]:
Station_forcing_run['Forc_DIR'][1:100].values

In [None]:
Station_forcing_run['Forc_WIND'][1:100].values

In [None]:
Station_forcing_run['Forc_TA']