In [None]:
# Cell 1: Import required libraries
import pandas as pd
import numpy as np
import sqlite3
import time
import datetime
from icoscp.cpb.dobj import Dobj
import os
from icoscp_core.icos import bootstrap
from icoscp import cpauth

In [None]:
# Cell 2: Input parameters (edit these)
OSVAS='/perm/sp3c/OSVAS/'
station_name = "Majadas_south"
start_date = "2016-06-01"
end_date = "2016-07-31"
station_list_path = os.path.join(OSVAS,"./sqlites/station_list_SURFEX.csv")
dataset_doi = "https://meta.icos-cp.eu/objects/dDlpnhS3XKyZjB22MUzP_nAm"

#In the following dictionary, select the variable names from dataset_doi 
#and how they will be renamed in the output sqlite

variables = {'H_F_MDS': 'H', 'LE_F_MDS': 'LE'}

#Authenticate into ICOS:
cookie_file_path = os.path.join(OSVAS,"./sqlites/icos_cookie.txt")
cookie_token=open(cookie_file_path,'r').readline().rstrip()
meta, data = bootstrap.fromCookieToken(cookie_token)
cpauth.init_by(data.auth)

#Test: If the authentication went well, these lines of code will not fail:
import icoscp
from icoscp.dobj import Dobj
obj_flux='https://meta.icos-cp.eu/objects/dDlpnhS3XKyZjB22MUzP_nAm'
dobj_flux=Dobj(obj_flux).data

In [None]:
# Cell 3: Define helper functions
def load_station_metadata(path):
    return pd.read_csv(path)

def get_station_info(name, metadata_df):
    row = metadata_df[metadata_df['name'] == name]
    if row.empty:
        raise ValueError(f"Station '{name}' not found in the metadata file.")
    return row.iloc[0]

def fetch_flux_data(doi, cookie_token):
    # Bootstrap session using your token
    meta, data = bootstrap.fromCookieToken(cookie_token)
    cpauth.init_by(data.auth)

    # Fetch the data object
    dobj = Dobj(doi)
    df = dobj.data
    return df


def fetch_flux_data(doi):
    dobj = Dobj(doi)
    df = dobj.data
    return df


def process_data(df, station_info, start, end):
    df['valid_dttm'] = pd.to_datetime(df['TIMESTAMP'], utc=True)
    df = df[(df['valid_dttm'] >= start) & (df['valid_dttm'] <= end)]
    df['valid_dttm'] = df['TIMESTAMP'].view('int64')
    df = df.dropna(subset=['H_F_MDS', 'LE_F_MDS'])
    df['SID'] = station_info['SID']
    df['lat'] = station_info['lat']
    df['lon'] = station_info['lon']
    df['elev'] = station_info['elev']
    return df[['valid_dttm', 'SID', 'lat', 'lon', 'elev', 'H_F_MDS', 'LE_F_MDS']]

def process_data(df, station_info, start, end, variable_names):
    """
    Processes a DataFrame, selecting data within a time range, adding station information,
    and renaming specified columns based on a dictionary.

    Args:
        df (pd.DataFrame): The input DataFrame with a 'TIMESTAMP' column and the variables to process.
        station_info (dict): A dictionary containing station information with keys 'SID', 'lat', 'lon', and 'elev'.
        start (str or datetime): The start timestamp for filtering.
        end (str or datetime): The end timestamp for filtering.
        variable_names (dict): A dictionary where keys are the original column names in df
                               and values are the desired new column names.

    Returns:
        pd.DataFrame: A DataFrame containing the processed data with renamed columns.
    """
    df['valid_dttm'] = pd.to_datetime(df['TIMESTAMP'], utc=True)

    # Filter to desired datetime range
    df = df[(df['valid_dttm'] >= start) & (df['valid_dttm'] <= end)].copy()

    # Convert to Unix time in seconds
    df['valid_dttm'] = df['valid_dttm'].astype('int64')/1000


    # Select and drop rows with NaN values for the specified variables
    variables_to_process = list(variable_names.keys())
    df = df.dropna(subset=variables_to_process).copy()

    df['SID'] = station_info['SID']
    df['lat'] = station_info['lat']
    df['lon'] = station_info['lon']
    df['elev'] = station_info['elev']

    # Select the desired columns and rename them
    columns_to_select = ['valid_dttm', 'SID', 'lat', 'lon', 'elev'] + variables_to_process
    df_processed = df[columns_to_select].rename(columns=variable_names)

    return df_processed

In [None]:
# Cell 4: Load metadata and fetch data
station_metadata = load_station_metadata(station_list_path)
station_info = get_station_info(station_name, station_metadata)
print(f"Loaded metadata for station: {station_name}")
print(station_info)
df_raw = fetch_flux_data(dataset_doi)
#df_raw = fetch_flux_data(dataset_doi, cookie_file_path)
print("Fetched data from ICOS.")
df_raw.head()

In [None]:
# Cell 5: Process and preview final data
df_processed = process_data(df_raw, station_info, start_date, end_date, variables)
df_processed.head()

In [None]:
# Cell 6: Write to SQLite
year = pd.to_datetime(start_date).year
output_dir = os.path.join(OSVAS,"sqlites/data/observations",station_name)
os.makedirs(output_dir, exist_ok=True) # Create directory if it doesn't exist
output_file = os.path.join(output_dir, f"OBSTABLE_{year}.sqlite")


with sqlite3.connect(output_file) as conn:
    df_processed.to_sql("SYNOP", conn, if_exists="replace", index=False)

    conn.execute("DROP TABLE IF EXISTS tmp")
    conn.execute("""
        CREATE TABLE tmp (
            valid_dttm REAL,
            SID REAL,
            lat REAL,
            lon REAL,
            elev REAL,
            H REAL,
            LE REAL
        )
    """)
    conn.execute("INSERT INTO tmp SELECT * FROM SYNOP")
    conn.execute("DROP TABLE SYNOP")
    conn.execute("ALTER TABLE tmp RENAME TO SYNOP")

print(f"Data written to {output_file}")