In [None]:

import os
os.environ["FONTCONFIG_FILE"] = f"/home/{os.environ['USER']}/myfonts/fonts.conf"
os.environ["FONTCONFIG_PATH"] = f"/home/{os.environ['USER']}/myfonts"
# fix for an error occuring when calling fig.write below (17/02/2026)
# make sure this dir exists
os.environ["TMPDIR"] = f"/home/{os.environ['USER']}/tmp"
import pandas as pd
import urllib.request
from datetime import datetime,date
import numpy as np
import matplotlib.pyplot as plt
import kaleido; kaleido.get_chrome()
import plotly.express as px
import plotly.graph_objects as go
import json
import yaml
from calendar import monthrange
import xarray as xr
import cdsapi
import zipfile
from glob import glob
import datetime as dt     
import re


#important
#before running this script, please go to cads-forms-json, git pull, then git checkout prod


# wishlist
# - further discriminate products (eg. sensor lines in atm composition, sea ice thickness L3/L4)
# - check changes in filenames in Cryo and how they may be useful
# - make it more Flourish-compatible
# - maybe highlight updates from previous year?
# - add LWL small lakes
# - Ice sheet velocity to include antarctica
# - put land hydrology just below cryosphere
# - update repository cds-forms-c3s and change to c3sprod
#   - git checkout c3sprod
#   - git pull
# - download datasets that only contain info in the time dimension from CDS
# - try to make the code more path-independent
# - install on athos and run every week. update some figure repo




def extract_dates_from_TSI():
    url='https://gerb.oma.be/tsi/C3S_RMIB_daily_TSI_composite_ICDR_v3.4.txt' # check this URL everytime
    c=pd.read_csv(url,skiprows=128,sep=r"\s+",header=None)
    return pd.Timestamp(str(c[3].iloc[0])),pd.Timestamp(str(c[3].iloc[-1]))
def extract_dates_icesheets(datasets_dir, entry,region):
    # opens TCDR and ICDR files for both Antarctica and Greenland, then computes max/min dates
    # needs adjustment for per product sorting
    # region: Ant, Gr
    
    # -- Ice Sheet Surface Elevation Change files
    cads_forms_yml_dir=conf['cads_forms_yml_dir']
    fname_generate=f'{cads_forms_yml_dir}/{entry}/gecko-config/generate.yaml'
    with open(fname_generate) as f:
        generate= yaml.safe_load(f)

    #download all manifest files
    os.system(f'rm -rf {datasets_dir}/*manifest*')
    for i in range(len(generate['manifest'])):
        os.system(f'wget {generate['manifest'][i]} -P {datasets_dir}')

    #load manifest files and retrieve time coverage from the different filenames
    # download latest versions if needed
    
    manif_list = glob(f'{datasets_dir}/*manifest*{region}*txt')
    # print("manif_list",manif_list)
    datebegs = []
    dateends = []
    for fname_manif in manif_list:
        with open(fname_manif) as f:
            flist=f.readlines()
        
        flist = [f for f in flist if f != '\n']  # there were some \n characters in the manifest files provided by ENVEO
        
        for fname in flist:
            fname_local = fname.replace('\n','').split('/')[-1]
            fname_path = f'{datasets_dir}{fname_local}'
        
            if os.path.exists(fname_path):
                print(f'{fname_path} file exists!')
            else:
                print(f'Downloading {fname.replace('\n','')} ...')
                os.system(f'wget --user=cds --password=\'eUpN,2sTplBa\' {fname.replace('\n','')} -P {datasets_dir}')
            
            # print('will open '+fname_path)
            nc = xr.open_dataset(fname_path)
            datebegs.append(pd.Timestamp(nc['time'].values[0]))
            dateends.append(pd.Timestamp(nc['time'].values[-1]))
    # print(datebegs,dateends)
    return min(datebegs),max(dateends)
def extract_dates_massbalance(datasets_dir):
    # -- Ice Sheet Gravimetric Mass Balance
    cads_forms_yml_dir=conf['cads_forms_yml_dir']
    product='Ice Sheet Gravimetric Mass Balance' # contains all info for IS-SEC
    entry=conf['PRODUCT'][product]['entry'][0]
    fname_generate=f'{cads_forms_yml_dir}/{entry}/gecko-config/generate.yaml'
    with open(fname_generate) as f:
        generate= yaml.safe_load(f)
    
    #download all manifest files -- temporary disabled
    # os.system(f'rm -rf {datasets_dir}/manifest*')
    # for i in range(len(generate['manifest'])):
    #     os.system(f'wget {generate['manifest'][i]} -P {datasets_dir}')
    
    # temporary
    os.system(f'wget https://cds:5cXcRskEvMoMHrJQtxfy@cds.c3s.eodc.eu/manifest/manifest_c3s2_312a_eodc_ice_sheets_gravimetry_latest.txt -P {datasets_dir}')
    # download latest versions if needed
    print(glob(f'{datasets_dir}/*gravimetry_latest.txt'))
    fname_manif=glob(f'{datasets_dir}/*gravimetry_latest.txt')[0]
    
    with open(fname_manif) as f:
        flist=f.readlines()
    fname_gmb=flist[-1].replace('\n','').split('/')[-1]
    if os.path.exists(f'{datasets_dir}{fname_gmb}'):
        print(f'{fname_gmb} file exists!')
    else:
        print(f'Downloading {fname_gmb} ...')
        os.system(f'wget {flist[-1].replace('\n','')} -P {datasets_dir}')
    
    
    fname = f'{datasets_dir}{fname_gmb}'
    nc = xr.open_dataset(fname)
    return pd.Timestamp(nc['time'].values[0]),pd.Timestamp(nc['time'].values[-1])
def extract_dates_derived_glaciers(jfile):
    with open(jfile) as f:
        gen= yaml.safe_load(f)[0]
    ymin = int(gen['hydrological_year'][0][0:4])
    ymax = int(gen['hydrological_year'][-1][0:4])+1
    mmax=9
    dmax=30
    mmin=4
    dmin=1
    print('Glaciers ',ymin,ymax)
    return pd.Timestamp(f'{ymin}-{mmin}-{dmin}'), pd.Timestamp(f'{ymax}-{mmax}-{dmax}')
def extract_dates_icesheet_velocity(datasets_dir, entry,region):
    generateyml=f'{conf['cads_forms_yml_dir']}/{entry}/gecko-config/generate.yaml'
    generate_dic = yaml.safe_load(open(generateyml))
    manifest_list = generate_dic['manifest']
    # print('list',region,manifest_list)
    manifest= next((url for url in manifest_list if region in url), None)
    # print(manifest)
    #download all manifest files
    os.system(f'rm -rf {datasets_dir}/*manifest*')
    os.system(f'wget {manifest} -P {datasets_dir}')

    fname_manif=glob(f'{datasets_dir}/*manifest_*.txt')[0]
    # print(fname_manif)

    #load manifest files and retrieve time coverage from the different filenames
    with open(fname_manif) as f:
        flist=f.readlines()
    datebeg=min([pd.to_datetime(flist[i].replace('\n','').split('/')[-1].split('_')[-4]) for i in range(len(flist))])
    dateend=max([pd.to_datetime(flist[i].replace('\n','').split('/')[-1].split('_')[-3]) for i in range(len(flist))])
    
    return (pd.Timestamp(datebeg),pd.Timestamp(dateend))
def extract_dates_lake_levels(datasets_dir):
    cads_forms_yml_dir=conf['cads_forms_yml_dir']
    product='Lake Water Level' # contains all info for IS-SEC
    entry=conf['PRODUCT'][product]['entry'][0]
    fname_generate=f'{cads_forms_yml_dir}/{entry}/gecko-config/generate.yaml'
    with open(fname_generate) as f:
        generate= yaml.safe_load(f)
    # print(generate['manifest'])

    #download all manifest files
    os.system(f'rm -rf {datasets_dir}/*manifest*')
    os.system(f'wget {generate['manifest'][-1]} -P {datasets_dir}')

    fname_manif=glob(f'{datasets_dir}/*manifest_*.txt')[0]
    print(fname_manif)

    #load manifest files and retrieve time coverage from the different filenames
    with open(fname_manif) as f:
        flist=f.readlines()
    datebeg=min([pd.to_datetime(flist[i].replace('\n','').split('/')[-1].split('_')[-3]) for i in range(len(flist))])
    dateend=max([pd.to_datetime(flist[i].replace('\n','').split('/')[-1].split('_')[-2]) for i in range(len(flist))])
    return pd.Timestamp(datebeg),pd.Timestamp(dateend)
def datemax2(row):
    row = row.dropna()
    ymax = int(max(row['year']))
    if 'month' in row.keys():
        mmax = int(max(row['month']))
    else:
        mmax=12
    xx,dmax=monthrange(ymax,mmax)
    datemax = pd.Timestamp(f'{ymax}-{mmax}-{dmax}')
    return datemax
def datemin2(row):
    row = row.dropna()
    ymin = int(min(row['year']))
    if 'month' in row.keys():
        mmin = int(min(row['month']))
    else:
        mmin=1
    xx,dmin=monthrange(ymin,mmin)
    datemin = pd.Timestamp(f'{ymin}-{mmin}-{dmin}')
    return datemin
def extract_dates_wv(jfilepath):
    f = open(jfilepath)
    # returns JSON object as 
    # a dictionary
    data = json.load(f)
    # print(data)
    df = pd.DataFrame(data)
    df['datemax'] = df.apply(datemax2,axis=1)
    df['datemin'] = df.apply(datemin2,axis=1)
    datemin = df['datemin'].min()
    datemax = df['datemax'].max()
    return datemin,datemax 
def check_time_agg(row):
    row = row.dropna()
    if 'time_aggregation' in row.keys():
        time_aggregation = row['time_aggregation'][0]
        # print('time_agg',time_aggregation)
        if time_aggregation in ['daily_average','daily_mean','day','day_average']: 
            time_agg = 'day'
            return time_agg
        elif time_aggregation == 'daily':
            time_agg='daily'
            return time_agg
        #note interim solution for 5-daily-composite...
        elif time_aggregation in [
            'monthly_average',
            '5_daily_composite',
            'monthly_mean',
            '27_days',
            'month',
            'monthly',
            'month_average',
            '10_day_average', # debatable..
            ]:
            time_agg = 'monthly'
            return time_agg
        else:
            # print(row)
            print('Could not determine time_agg')
            raise SystemExit
    elif 'period' in row.keys(): # applies to ice_sheets
        time_agg = 'period'
        return time_agg
    elif 'temporal_aggregation' in row.keys():
        time_aggregation = row['temporal_aggregation'][0]
        if time_aggregation in ['monthly','6-hourly']:
            time_agg='monthly'
        elif time_aggregation == 'daily':
            time_agg='daily'
        else:
            print('Error in temporal aggregation')
            raise SystemExit
        return time_agg
    else:
        if ('day' in row.keys()):
            time_agg='day'
            return time_agg
        elif  'nominal_day' in row.keys():
            time_agg='nominal_day'
            return time_agg
        else:
            time_agg = 'monthly'
            return time_agg
def compute_datemax(row):
 
    time_agg=check_time_agg(row) # check time aggregation of data in this row
    # print('time_agg',time_agg)
    if time_agg =='period':
        per_str=max(row['period'])
        ymax=int(per_str[5::])
        mmax=9
        dmax=30
    else: 
        ymax = int(max(row['year']))
        if 'month' in row.keys():
            mmax = int(max(row['month']))
        else:
            mmax=12
        xx,ndays = monthrange(ymax,mmax)
        if time_agg in ['day','nominal_day']:
            # print(row[time_agg])
            dmax = int(max(row[time_agg])) 
        elif time_agg in ['daily']:
            dmax = int(max(row['day'])) 
        else:
            dmax = ndays # last day of month
        if dmax >ndays:
            print('Beware error in allowed dates...')
            # print(row)
            dmax=ndays

    datemax = pd.Timestamp(f'{ymax}-{mmax}-{dmax}')
    return datemax
def compute_datemin(row):
    time_agg=check_time_agg(row) # check time aggregation of data in this row
    if time_agg =='period':
        per_str=max(row['period'])
        ymin=int(per_str[0:4])
        mmin=10
        dmin=1
    else:
        ymin = int(min(row['year'])) 
        if 'month' in row.keys():
            mmin = int(min(row['month']))
        else:
            mmin=1
        if time_agg in ['day','nominal_day']:
            dmin = int(min(row[time_agg])) 
        elif time_agg in ['daily']:
            dmin = int(min(row['day'])) 
        else:
            dmin = 1 # first day of month
        xx,ndays = monthrange(ymin,mmin)     
        if dmin>ndays:
            print('Beware error in allowed dates...')
            # print(row)
            dmin=1
    datemin = pd.Timestamp(f'{ymin}-{mmin}-{dmin}')
    return datemin
def calc_dateminmax_from_cds_form(jfilepath,ecv):
    # Opening JSON file
    f = open(jfilepath)
    # returns JSON object as 
    # a dictionary
    data = json.load(f)
    # print(data)
    df = pd.DataFrame(data)
    # display(df)
    # print(df.keys())
    # print(len(df))
    
    # find records where dates cannot be defined
    if 'sensor_and_algorithm' in df.keys():        
        lst_erase=[]
        for i in range(len(df)):
            if (df['sensor_and_algorithm'][i][0]=='merged_obs4mips'): lst_erase.append(i)
        # now .drop these problematic rows
        for i in lst_erase:
            df=df.drop(lst_erase)
    if ecv == 'Earth Radiation Budget':        
        lst_erase=[]
        for i in range(len(df)):
            if (df['variable'][i][0]=='total_solar_irradiance'): lst_erase.append(i) # this info is read from the dataset itself
        # now .drop these problematic rows
        for i in lst_erase:
            df=df.drop(lst_erase)
    # for i in range(len(df)):
    #     print(df.loc[i])
    #     if ('year' not in df[i]): lst_erase.append(i)

    df['datemax'] = df.apply(compute_datemax,axis=1)
    df['datemin'] = df.apply(compute_datemin,axis=1)
    datemin = df['datemin'].min()
    datemax = df['datemax'].max()
    return datemin,datemax

with open('config-athos.yml') as f:
    conf= yaml.safe_load(f)

cds_form_dir=conf['cds_form_dir']
datasets_dir = conf['datasets_dir']
thisyear=dt.datetime.today().year+1



  import kaleido; kaleido.get_chrome()


### BUILD PANDAS DATAFRAME FOR TIME COVERAGE BY ECV 

In [2]:

datesbeg = {}
datesend = {}
ecv_dic = {}
for k,ecv in enumerate(conf['ECV']):
    print(ecv)
    entries = conf['ECV'][ecv]['entry']
    print(entries)
    datemin_list = []
    datemax_list = []
    if ecv in ['Earth Radiation Budget']: 
        datemin,datemax = extract_dates_from_TSI()
        datemin_list.append(datemin)
        datemax_list.append(datemax)
    for entry in entries:
        jfilepath=f'{cds_form_dir}{entry}/constraints.json'
        print(entry)
        print(jfilepath)
        if entry == 'satellite-ice-sheet-elevation-change':
            datemin,datemax = extract_dates_icesheets(datasets_dir,entry,'AIS')
            datemin_list.append(datemin)
            datemax_list.append(datemax)
            datemin,datemax = extract_dates_icesheets(datasets_dir,entry,'GrIS')
            datemin_list.append(datemin)
            datemax_list.append(datemax)
        elif entry == 'satellite-ice-sheet-mass-balance':
            datemin,datemax = extract_dates_massbalance(datasets_dir)
            datemin_list.append(datemin)
            datemax_list.append(datemax)
        elif entry == 'derived-gridded-glacier-mass-change':
            jfile = f'{cds_form_dir}{entry}/constraints.json'
            datemin,datemax = extract_dates_derived_glaciers(jfile)
            datemin_list.append(datemin)
            datemax_list.append(datemax)
        elif entry == 'satellite-greenland-ice-sheet-velocity':
            datemin,datemax = extract_dates_icesheet_velocity(datasets_dir,entry,'AIS')
            datemin_list.append(datemin)
            datemax_list.append(datemax)
            datemin,datemax = extract_dates_icesheet_velocity(datasets_dir,entry,'GrIS')
            datemin_list.append(datemin)
            datemax_list.append(datemax)
        elif entry == 'insitu-glaciers-extent':    
            datemin= pd.Timestamp('1990-01-01') # http://www.glims.org/rgi_user_guide/06_dataset_summary.html
            datemax= pd.Timestamp('2010-12-31')
            datemin_list.append(datemin)
            datemax_list.append(datemax)
        elif entry =='satellite-total-column-water-vapour-ocean':
            # temporal aggregation is messed up. does not have the same meaning as other datasets
            # monthly should be yearly
            # 6-hourly should be monthly
            # need to write a special function that accounts for this
            datemin,datemax = extract_dates_wv(jfilepath)
            datemin_list.append(datemin)
            datemax_list.append(datemax)
        elif entry == 'satellite-lake-water-level':
            datemin,datemax = extract_dates_lake_levels(datasets_dir)
            datemin_list.append(datemin)
            datemax_list.append(datemax)
        # elif jfilepath == '/Users/cxjo/Documents/cds-forms-c3s/satellite-land-cover/constraints.json':
        #     datemin_list.append(pd.Timestamp('1992-01-01'))
        #     datemax_list.append(pd.Timestamp('2022-12-31'))
        else:
            # print(jfilepath)
            datemin,datemax = calc_dateminmax_from_cds_form(jfilepath,ecv)
            # print(ecv,datemin_list,datemax_list)
            datemin_list.append(datemin)
            datemax_list.append(datemax)
            # print(ecv,datemin_list,datemax_list)

    # now get the max and min per ECV, accounting for all products
    datemin_list = np.array(datemin_list)
    datemax_list = np.array(datemax_list)
    # datesbeg[ecv] = np.min(datemin_list)
    # datesend[ecv] = np.max(datemax_list)
    ecv_dic[k] = {
        'ECV'     : ecv,
        'DateBeg' : np.min(datemin_list),
        'DateEnd' : np.max(datemax_list),
        'Thematic Hub' : conf['ECV'][ecv]['Thematic_hub']
    }


# ecv_pd = pd.DataFrame([conf['ECV'].keys(),datesbeg,datesend],index=['DateBeg','DateEnd']).T
ecv_pd = pd.DataFrame.from_dict(ecv_dic,orient='index').sort_values(['Thematic Hub'])
ecv_pd['DateBeg'] = ecv_pd['DateBeg'].dt.ceil(freq='s')  
ecv_pd['DateEnd'] = ecv_pd['DateEnd'].dt.ceil(freq='s')  
ecv_pd['DateEnd'] = ecv_pd['DateEnd'].apply(lambda dt: dt.strftime("%Y-%m-%d"))
ecv_pd['DateBeg'] = ecv_pd['DateBeg'].apply(lambda dt: dt.strftime("%Y-%m-%d"))

print(ecv_pd.to_markdown())
today_date = pd.Timestamp.today().strftime('%Y%m%d') 
print(today_date)
ecv_pd.to_excel(f'ECV_time_coverage_perECV_{today_date}.xlsx')
# fig = px.timeline(ecv_pd, x_start="DateBeg", x_end="DateEnd", y='Product',color='Lot')

ecv_pd = ecv_pd.reindex([0,2,14,3,21,4,15,8,6,7,1,9,10,5,12,19,11,18,17,20,13,16])

fig = px.timeline(ecv_pd, x_start="DateBeg", x_end="DateEnd",y='ECV',color='Thematic Hub')

# fig = px.timeline(datasets_df, x_start="startdate", x_end="enddate", y='ECV')
fig.update_yaxes(autorange="reversed")
fig.update_layout(
    autosize=False,
    width=1200,
    height=800,
)
# fig.update_layout(
#     xaxis = dict(
#         dtick = 'Y1',
#         tickformat="%Y",
#     )
# )

xlab = np.arange(1970,thisyear+1).astype('int')
xlabtxt = [f'{i}' for i in xlab]


fig.update_xaxes(minor=dict(ticks="inside", showgrid=True))
fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = xlab,
        ticktext = xlabtxt
    )
)
fig.update_xaxes(tickangle=-45)
fig.update_layout(
    xaxis = dict(
        tickfont = dict(
            size=10),
        )
    )
fig.update_xaxes(range = ['1970-01-01',f'{thisyear-1}-12-31'])
# print(fig)
today_date = pd.Timestamp.today().strftime('%Y%m%d') 
print(today_date)
print(f'temporal_coverage_by_ECV_{today_date}.png')
fig.write_image(f'temporal_coverage_by_ECV_{today_date}.pdf')
fig.write_image(f'temporal_coverage_by_ECV_{today_date}.png')
fig.show()


Aerosols
['satellite-aerosol-properties']
satellite-aerosol-properties
/home/cxjo/C3S_stuff/cads-forms-json/satellite-aerosol-properties/constraints.json
Albedo
['satellite-albedo']
satellite-albedo
/home/cxjo/C3S_stuff/cads-forms-json/satellite-albedo/constraints.json
Greenhouse Gases
['satellite-carbon-dioxide', 'satellite-methane']
satellite-carbon-dioxide
/home/cxjo/C3S_stuff/cads-forms-json/satellite-carbon-dioxide/constraints.json
satellite-methane
/home/cxjo/C3S_stuff/cads-forms-json/satellite-methane/constraints.json
Clouds
['satellite-cloud-properties']
satellite-cloud-properties
/home/cxjo/C3S_stuff/cads-forms-json/satellite-cloud-properties/constraints.json
Earth Radiation Budget
['satellite-earth-radiation-budget']
satellite-earth-radiation-budget
/home/cxjo/C3S_stuff/cads-forms-json/satellite-earth-radiation-budget/constraints.json
Fire
['satellite-fire-burned-area', 'satellite-fire-radiative-power']
satellite-fire-burned-area
/home/cxjo/C3S_stuff/cads-forms-json/satellite

--2026-02-17 15:01:41--  https://cds.c3s.enveo.at/c3s_manifest/manifest_c3s2_313d_ENVEO_ice_sheets_velocity_AIS_CDR_latest.txt
Resolving cds.c3s.enveo.at (cds.c3s.enveo.at)... 83.175.116.100
Connecting to cds.c3s.enveo.at (cds.c3s.enveo.at)|83.175.116.100|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 224 [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_velocity_AIS_CDR_latest.txt’

     0K                                                       100% 11.5M=0s

2026-02-17 15:01:42 (11.5 MB/s) - ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_velocity_AIS_CDR_latest.txt’ saved [224/224]

--2026-02-17 15:01:42--  https://cds.c3s.enveo.at/c3s_manifest/manifest_c3s2_313d_ENVEO_ice_sheets_velocity_GrIS_CDR_latest.txt
Resolving cds.c3s.enveo.at (cds.c3s.enveo.at)... 83.175.116.100
Connecting to cds.c3s.enveo.at (cds.c3s.enveo.at)|83.175.116.100|:443... connected.
HTTP request sent, awaiting response... 200 O

satellite-ice-sheet-elevation-change
/home/cxjo/C3S_stuff/cads-forms-json/satellite-ice-sheet-elevation-change/constraints.json


HTTP request sent, awaiting response... 200 OK
Length: 139 [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt’

     0K                                                       100%  368M=0s

2026-02-17 15:01:43 (368 MB/s) - ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt’ saved [139/139]

--2026-02-17 15:01:43--  https://cds.c3s.enveo.at/c3s_manifest/manifest_c3s2_313d_ENVEO_ice_sheets_surface_GrIS_ICDR_latest.txt
Resolving cds.c3s.enveo.at (cds.c3s.enveo.at)... 83.175.116.100
Connecting to cds.c3s.enveo.at (cds.c3s.enveo.at)|83.175.116.100|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 282 [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_GrIS_ICDR_latest.txt’

     0K                                                       100% 13.9M=0s

2026-02-17 15:01:43 (13.9 MB/s) - ‘/home/cxjo/C3S_stuff/dataset

/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers5_199201-202510_2026-01-14.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers2_199201-201910_2020-01-20.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers3_199201-202010_2021-01-25.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers4_199201-202209_2022-12-16.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers5_199201-202409_2024-12-15.nc file exists!


--2026-02-17 15:02:27--  https://cds.c3s.enveo.at/c3s_manifest/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt
Resolving cds.c3s.enveo.at (cds.c3s.enveo.at)... 83.175.116.100
Connecting to cds.c3s.enveo.at (cds.c3s.enveo.at)|83.175.116.100|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 139 [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt’

     0K                                                       100%  510M=0s

2026-02-17 15:02:28 (510 MB/s) - ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt’ saved [139/139]

--2026-02-17 15:02:28--  https://cds.c3s.enveo.at/c3s_manifest/manifest_c3s2_313d_ENVEO_ice_sheets_surface_GrIS_ICDR_latest.txt
Resolving cds.c3s.enveo.at (cds.c3s.enveo.at)... 83.175.116.100
Connecting to cds.c3s.enveo.at (cds.c3s.enveo.at)|83.175.116.100|:443... connected.
HTTP request sent, awaiting response... 200 OK

/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_vers2_199108_202210_2023-01-17.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_vers3_199108_202310_2024-01-16.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers4_199108_202408_2024-11-16.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers5_199108_202409_2024-12-09.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers6_199108_202409_2024-12-09.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers5_199108-202510_2026-01-30.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers6_199108-202510_2026-01-30.nc file exists!
satellite-ice-sheet-mass-balance
/home/cxjo/C3S_stuff/cads-forms-json/satellite-ice-sheet-mass-balance/constraints.json


--2026-02-17 15:02:29--  https://cds:*password*@cds.c3s.eodc.eu/manifest/manifest_c3s2_312a_eodc_ice_sheets_gravimetry_latest.txt
Resolving cds.c3s.eodc.eu (cds.c3s.eodc.eu)... 193.170.203.81
Connecting to cds.c3s.eodc.eu (cds.c3s.eodc.eu)|193.170.203.81|:443... connected.
HTTP request sent, awaiting response... 401 Unauthorized
Authentication selected: Basic realm="traefik"
Reusing existing connection to cds.c3s.eodc.eu:443.
HTTP request sent, awaiting response... 200 OK
Length: 244 [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_312a_eodc_ice_sheets_gravimetry_latest.txt’

     0K                                                       100% 1.42G=0s

2026-02-17 15:02:29 (1.42 GB/s) - ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_312a_eodc_ice_sheets_gravimetry_latest.txt’ saved [244/244]



['/home/cxjo/C3S_stuff/datasets/manifest_c3s2_312a_eodc_ice_sheets_gravimetry_latest.txt']
C3S_GMB_GRACE_vers5.nc file exists!
Glaciers
['insitu-glaciers-extent', 'derived-gridded-glacier-mass-change']
insitu-glaciers-extent
/home/cxjo/C3S_stuff/cads-forms-json/insitu-glaciers-extent/constraints.json
derived-gridded-glacier-mass-change
/home/cxjo/C3S_stuff/cads-forms-json/derived-gridded-glacier-mass-change/constraints.json
Glaciers  1975 2021
Upper-air Water Vapour
['satellite-humidity-profiles', 'satellite-total-column-water-vapour-land-ocean', 'satellite-total-column-water-vapour-ocean', 'satellite-upper-troposphere-humidity']
satellite-humidity-profiles
/home/cxjo/C3S_stuff/cads-forms-json/satellite-humidity-profiles/constraints.json
satellite-total-column-water-vapour-land-ocean
/home/cxjo/C3S_stuff/cads-forms-json/satellite-total-column-water-vapour-land-ocean/constraints.json
satellite-total-column-water-vapour-ocean
/home/cxjo/C3S_stuff/cads-forms-json/satellite-total-column-wa

--2026-02-17 15:02:30--  https://cds:*password*@cds.c3s.eodc.eu/manifest/manifest_c3s2_313c_eodc_lakes_lwl_icdr_latest.txt
Resolving cds.c3s.eodc.eu (cds.c3s.eodc.eu)... 193.170.203.81
Connecting to cds.c3s.eodc.eu (cds.c3s.eodc.eu)|193.170.203.81|:443... connected.
HTTP request sent, awaiting response... 401 Unauthorized
Authentication selected: Basic realm="traefik"
Reusing existing connection to cds.c3s.eodc.eu:443.
HTTP request sent, awaiting response... 200 OK
Length: 57416 (56K) [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313c_eodc_lakes_lwl_icdr_latest.txt’

     0K .......... .......... .......... .......... .......... 89% 1.65M 0s
    50K ......                                                100%  308M=0.03s

2026-02-17 15:02:30 (1.84 MB/s) - ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313c_eodc_lakes_lwl_icdr_latest.txt’ saved [57416/57416]



/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313c_eodc_lakes_lwl_icdr_latest.txt
satellite-lake-water-temperature
/home/cxjo/C3S_stuff/cads-forms-json/satellite-lake-water-temperature/constraints.json
Land Cover
['satellite-land-cover']
satellite-land-cover
/home/cxjo/C3S_stuff/cads-forms-json/satellite-land-cover/constraints.json
Ocean Colour
['satellite-ocean-colour']
satellite-ocean-colour
/home/cxjo/C3S_stuff/cads-forms-json/satellite-ocean-colour/constraints.json
Ozone
['satellite-ozone-v1']
satellite-ozone-v1
/home/cxjo/C3S_stuff/cads-forms-json/satellite-ozone-v1/constraints.json
Precipitation
['satellite-precipitation', 'satellite-precipitation-microwave', 'satellite-precipitation-microwave-infrared']
satellite-precipitation
/home/cxjo/C3S_stuff/cads-forms-json/satellite-precipitation/constraints.json
satellite-precipitation-microwave
/home/cxjo/C3S_stuff/cads-forms-json/satellite-precipitation-microwave/constraints.json
satellite-precipitation-microwave-infrared
/home/cxjo/C3S_

### Build pandas dataframe for the figure ordered by product


In [3]:
def calc_dateminmax_from_cds_form_2(jfilepath,ecv):
    # Opening JSON file
    f = open(jfilepath)
    # returns JSON object as 
    # a dictionary
    data = json.load(f)
    # print(data)
    df = pd.DataFrame(data)
    # display(df)
    
    # print(df.keys())
    # print(len(df))
    
    # find records where dates cannot be defined
    if 'sensor_and_algorithm' in df.keys():        
        lst_erase=[]
        for i in range(len(df)):
            if (df['sensor_and_algorithm'][i][0]=='merged_obs4mips'): lst_erase.append(i)
        # now .drop these problematic rows
        for i in lst_erase:
            df=df.drop(lst_erase)
    if ecv == 'Earth Radiation Budget':        
        lst_erase=[]
        for i in range(len(df)):
            if (df['variable'][i][0]=='total_solar_irradiance'): lst_erase.append(i) # this info is read from the dataset itself
        # now .drop these problematic rows
        for i in lst_erase:
            df=df.drop(lst_erase)
    # for i in range(len(df)):
    #     print(df.loc[i])
    #     if ('year' not in df[i]): lst_erase.append(i)

    df['datemax'] = df.apply(compute_datemax,axis=1)
    df['datemin'] = df.apply(compute_datemin,axis=1)

    datemin = df['datemin'].min()
    datemax = df['datemax'].max()

    return datemin,datemax
def extract_dates_dwd_products(jfilepath,product_family):
    
    f = open(jfilepath)
    # returns JSON object as 
    # a dictionary
    data = json.load(f)
    # print(data)
    df = pd.DataFrame(data)

    df_temp=df.copy()
    for i in range(len(df)):
        # df.loc[i,'product_family']= df_temp['product_family'][i][0]
        df.loc[i,'product_family'] = df_temp['product_family'][i][0]
    df2 = df[df['product_family'] ==product_family]
    # display(df)
    # display(df2)
    df2['datemax'] = df2.apply(compute_datemax,axis=1)
    df2['datemin'] = df2.apply(compute_datemin,axis=1)

    datemin = df2['datemin'].min()
    datemax = df2['datemax'].max()

    return datemin, datemax

prod_dic = {}
datemax_list = []
datemin_list = []

for k_prod,prod in enumerate(conf['PRODUCT']):
    # print(prod, conf['PRODUCT'][prod]['ECV'])
    ecv = conf['PRODUCT'][prod]['ECV']
    entry = conf['PRODUCT'][prod]['entry'][0]
    product = conf['PRODUCT'][prod]['Product']
    themHub = conf['PRODUCT'][prod]['Thematic_hub']
    jfilepath=f'{cds_form_dir}{entry}/constraints.json'
    print('PASSEI',jfilepath,prod,ecv,entry,product,themHub)
    if prod == 'ERB_RMIB_TSI':
        datemin,datemax = extract_dates_from_TSI()
    elif prod in ['CLOUDS_CLARA-A2','CLOUDS_CLARA-A3','CLOUDS_CCI_C3S',
                  'ERB_NASA_CERES','ERB_NOAA_HIRS','ERB_CCI_C3S','ERB_CLARA-A3',
                  'SRB_CLARA-A2','SRB_CLARA-A3','SRB_CCI_C3S']:
        product_family=conf['PRODUCT'][prod]['product_family']
        datemin,datemax = extract_dates_dwd_products(jfilepath,product_family=product_family)    
        print('CLOUDS',prod,datemin,datemax)
    elif prod == 'Ice Sheet Gravimetric Mass Balance':
        fname = glob(datasets_dir+'C3S_GMB*')[0]
        nc = xr.open_dataset(fname)
        datemin,datemax=(pd.Timestamp(nc['time'].values[0]),pd.Timestamp(nc['time'].values[-1]))
    elif prod == 'Ice Sheet Surface Elevation Change (Antarctica)':
        datemin,datemax = extract_dates_icesheets(datasets_dir,entry,'AIS')
    elif prod == 'Ice Sheet Surface Elevation Change (Greenland)':
        datemin,datemax = extract_dates_icesheets(datasets_dir,entry,'GrIS')
    elif prod == 'Ice Sheet Velocity (Antarctica)':
        datemin,datemax = extract_dates_icesheet_velocity(datasets_dir,entry,'AIS')
    elif prod == 'Ice Sheet Velocity (Greenland)':
        datemin,datemax = extract_dates_icesheet_velocity(datasets_dir,entry,'GrIS')
    elif prod == 'Glaciers elevation and mass change data':
        jfile = f'{cds_form_dir}{entry}/constraints.json'
        print(jfile)
        datemin,datemax = extract_dates_derived_glaciers(jfile)
        datemin_list.append(datemin)
        datemax_list.append(datemax)
    elif entry == 'insitu-glaciers-extent':    
        datemin= pd.Timestamp('1990-01-01') # http://www.glims.org/rgi_user_guide/06_dataset_summary.html
        datemax= pd.Timestamp('2010-12-31')
        datemin_list.append(datemin)
        datemax_list.append(datemax)
    elif entry =='satellite-total-column-water-vapour-ocean':
        # temporal aggregation is messed up. does not have the same meaning as other datasets
        # monthly should be yearly
        # 6-hourly should be monthly
        # need to write a special function that accounts for this
        datemin,datemax = extract_dates_wv(jfilepath)
        datemin_list.append(datemin)
        datemax_list.append(datemax)
    elif entry == 'satellite-lake-water-level':
        datemin,datemax = extract_dates_lake_levels(datasets_dir)
        datemin_list.append(datemin)
        datemax_list.append(datemax)
    else:
        datemin,datemax = calc_dateminmax_from_cds_form_2(jfilepath,ecv)
    print(prod,datemin,datemax)
    datemin_list.append(datemin)
    datemax_list.append(datemax)
    
    prod_dic[k_prod] = {
    'Product': product,
    'ECV'     : ecv,
    'DateBeg' : datemin,
    'DateEnd' : datemax,
    'Thematic Hub' : themHub
    }
    print(prod_dic[k_prod])

PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-aerosol-properties/constraints.json Aerosols Aerosols satellite-aerosol-properties Aerosols Atmospheric Composition
Aerosols 1995-06-01 00:00:00 2025-07-31 00:00:00
{'Product': 'Aerosols', 'ECV': 'Aerosols', 'DateBeg': Timestamp('1995-06-01 00:00:00'), 'DateEnd': Timestamp('2025-07-31 00:00:00'), 'Thematic Hub': 'Atmospheric Composition'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-carbon-dioxide/constraints.json CO2 Greenhouse Gases satellite-carbon-dioxide CO2 Atmospheric Composition
CO2 2002-10-01 00:00:00 2024-12-31 00:00:00
{'Product': 'CO2', 'ECV': 'Greenhouse Gases', 'DateBeg': Timestamp('2002-10-01 00:00:00'), 'DateEnd': Timestamp('2024-12-31 00:00:00'), 'Thematic Hub': 'Atmospheric Composition'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-methane/constraints.json CH4 Greenhouse Gases satellite-methane CH4 Atmospheric Composition
CH4 2002-10-01 00:00:00 2024-12-31 00:00:00
{'Product': 'CH4', 'ECV': 'Green



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

ERB_RMIB_TSI 1979-01-01 00:00:00 2024-09-30 00:00:00
{'Product': 'Earth Rad Budget - TotSolarIrrad', 'ECV': 'Earth Radiation Budget', 'DateBeg': Timestamp('1979-01-01 00:00:00'), 'DateEnd': Timestamp('2024-09-30 00:00:00'), 'Thematic Hub': 'Atmospheric Physics'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-earth-radiation-budget/constraints.json ERB_RMIB_SSI Earth Radiation Budget satellite-earth-radiation-budget Earth Rad Budget - SpecSolarIrrad Atmospheric Physics
ERB_RMIB_SSI 1979-01-01 00:00:00 2025-11-30 00:00:00
{'Product': 'Earth Rad Budget - SpecSolarIrrad', 'ECV': 'Earth Radiation Budget', 'DateBeg': Timestamp('1979-01-01 00:00:00'), 'DateEnd': Timestamp('2025-11-30 00:00:00'), 'Thematic Hub': 'Atmospheric Physics'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-earth-radiation-budget/constraints.json ERB_CLARA-A3 Earth Radiation Budget satellite-earth-radiation-budget Earth Rad Budget - CMSAF CLARA-A3 Atmospheric Physics
CLOUDS ERB_CLARA-A3 1979-01-01 00:00:00



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

Ice Sheet Velocity (Greenland) 2014-10-01 00:00:00 2023-09-30 00:00:00
{'Product': 'Ice Sheet Velocity (Greenland)', 'ECV': 'Ice Sheets', 'DateBeg': Timestamp('2014-10-01 00:00:00'), 'DateEnd': Timestamp('2023-09-30 00:00:00'), 'Thematic Hub': 'Cryosphere'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-greenland-ice-sheet-velocity/constraints.json Ice Sheet Velocity (Antarctica) Ice Sheets satellite-greenland-ice-sheet-velocity Ice Sheet Velocity (Antarctica) Cryosphere


HTTP request sent, awaiting response... 200 OK
Length: 224 [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_velocity_AIS_CDR_latest.txt’

     0K                                                       100% 9.47M=0s

2026-02-17 15:02:46 (9.47 MB/s) - ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_velocity_AIS_CDR_latest.txt’ saved [224/224]

--2026-02-17 15:02:46--  https://cds.c3s.enveo.at/c3s_manifest/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt


Ice Sheet Velocity (Antarctica) 2021-04-01 00:00:00 2023-03-31 00:00:00
{'Product': 'Ice Sheet Velocity (Antarctica)', 'ECV': 'Ice Sheets', 'DateBeg': Timestamp('2021-04-01 00:00:00'), 'DateEnd': Timestamp('2023-03-31 00:00:00'), 'Thematic Hub': 'Cryosphere'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-ice-sheet-mass-balance/constraints.json Ice Sheet Gravimetric Mass Balance Ice Sheets satellite-ice-sheet-mass-balance Ice Sheet Gravimetric Mass Balance Cryosphere
Ice Sheet Gravimetric Mass Balance 2002-04-16 20:23:54.375000 2022-12-17 09:35:37.500000
{'Product': 'Ice Sheet Gravimetric Mass Balance', 'ECV': 'Ice Sheets', 'DateBeg': Timestamp('2002-04-16 20:23:54.375000'), 'DateEnd': Timestamp('2022-12-17 09:35:37.500000'), 'Thematic Hub': 'Cryosphere'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-ice-sheet-elevation-change//constraints.json Ice Sheet Surface Elevation Change (Antarctica) Ice Sheets satellite-ice-sheet-elevation-change/ Ice Sheet Surface Elevation Cha

Resolving cds.c3s.enveo.at (cds.c3s.enveo.at)... 83.175.116.100
Connecting to cds.c3s.enveo.at (cds.c3s.enveo.at)|83.175.116.100|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 139 [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt’

     0K                                                       100%  530M=0s

2026-02-17 15:02:46 (530 MB/s) - ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt’ saved [139/139]

--2026-02-17 15:02:46--  https://cds.c3s.enveo.at/c3s_manifest/manifest_c3s2_313d_ENVEO_ice_sheets_surface_GrIS_ICDR_latest.txt
Resolving cds.c3s.enveo.at (cds.c3s.enveo.at)... 83.175.116.100
Connecting to cds.c3s.enveo.at (cds.c3s.enveo.at)|83.175.116.100|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 282 [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_GrIS_ICDR_lates

/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers5_199201-202510_2026-01-14.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers2_199201-201910_2020-01-20.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers3_199201-202010_2021-01-25.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers4_199201-202209_2022-12-16.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_AIS_RA_SEC_25km_vers5_199201-202409_2024-12-15.nc file exists!
Ice Sheet Surface Elevation Change (Antarctica) 1994-11-01 09:21:19.687500 2023-05-03 00:20:37.500000
{'Product': 'Ice Sheet Surface Elevation Change (Antarctica)', 'ECV': 'Ice Sheets', 'DateBeg': Timestamp('1994-11-01 09:21:19.687500'), 'DateEnd': Timestamp('2023-05-03 00:20:37.500000'), 'Thematic Hub': 'Cryosphere'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-ice-sheet-elevation-change//constraints.json Ice Sheet Surface Elevation Change (Greenland) Ice Sheets satellite-ice-sheet-elevation-change/ 

--2026-02-17 15:02:47--  https://cds.c3s.enveo.at/c3s_manifest/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt
Resolving cds.c3s.enveo.at (cds.c3s.enveo.at)... 83.175.116.100
Connecting to cds.c3s.enveo.at (cds.c3s.enveo.at)|83.175.116.100|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 139 [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt’

     0K                                                       100%  457M=0s

2026-02-17 15:02:48 (457 MB/s) - ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313d_ENVEO_ice_sheets_surface_AIS_ICDR_latest.txt’ saved [139/139]

--2026-02-17 15:02:48--  https://cds.c3s.enveo.at/c3s_manifest/manifest_c3s2_313d_ENVEO_ice_sheets_surface_GrIS_ICDR_latest.txt
Resolving cds.c3s.enveo.at (cds.c3s.enveo.at)... 83.175.116.100
Connecting to cds.c3s.enveo.at (cds.c3s.enveo.at)|83.175.116.100|:443... connected.
HTTP request sent, awaiting response... 200 OK

/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_vers2_199108_202210_2023-01-17.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_vers3_199108_202310_2024-01-16.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers4_199108_202408_2024-11-16.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers5_199108_202409_2024-12-09.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers6_199108_202409_2024-12-09.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers5_199108-202510_2026-01-30.nc file exists!
/home/cxjo/C3S_stuff/datasets/C3S_GrIS_RA_SEC_25km_Vers6_199108-202510_2026-01-30.nc file exists!
Ice Sheet Surface Elevation Change (Greenland) 1992-01-01 00:00:00 2025-07-01 00:00:00
{'Product': 'Ice Sheet Surface Elevation Change (Greenland)', 'ECV': 'Ice Sheets', 'DateBeg': Timestamp('1992-01-01 00:00:00'), 'DateEnd': Timestamp('2025-07-01 00:00:00'), 'Thematic Hub': 'Cryosphere'}
PASSEI /home/cxjo/C3S_st

--2026-02-17 15:02:49--  https://cds:*password*@cds.c3s.eodc.eu/manifest/manifest_c3s2_313c_eodc_lakes_lwl_icdr_latest.txt
Resolving cds.c3s.eodc.eu (cds.c3s.eodc.eu)... 193.170.203.81
Connecting to cds.c3s.eodc.eu (cds.c3s.eodc.eu)|193.170.203.81|:443... connected.
HTTP request sent, awaiting response... 401 Unauthorized
Authentication selected: Basic realm="traefik"
Reusing existing connection to cds.c3s.eodc.eu:443.
HTTP request sent, awaiting response... 200 OK
Length: 57416 (56K) [text/plain]
Saving to: ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313c_eodc_lakes_lwl_icdr_latest.txt’

     0K .......... .......... .......... .......... .......... 89% 1.36M 0s
    50K ......                                                100%  295M=0.04s

2026-02-17 15:02:50 (1.53 MB/s) - ‘/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313c_eodc_lakes_lwl_icdr_latest.txt’ saved [57416/57416]



/home/cxjo/C3S_stuff/datasets/manifest_c3s2_313c_eodc_lakes_lwl_icdr_latest.txt
Lake Water Level 1992-09-26 00:00:00 2024-12-30 00:00:00
{'Product': 'Lake Water Level', 'ECV': 'Lakes', 'DateBeg': Timestamp('1992-09-26 00:00:00'), 'DateEnd': Timestamp('2024-12-30 00:00:00'), 'Thematic Hub': 'Land Hydrology'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-soil-moisture/constraints.json Soil Moisture Soil Moisture satellite-soil-moisture Soil Moisture Land Hydrology
Soil Moisture 1978-11-01 00:00:00 2026-01-31 00:00:00
{'Product': 'Soil Moisture', 'ECV': 'Soil Moisture', 'DateBeg': Timestamp('1978-11-01 00:00:00'), 'DateEnd': Timestamp('2026-01-31 00:00:00'), 'Thematic Hub': 'Land Hydrology'}
PASSEI /home/cxjo/C3S_stuff/cads-forms-json/satellite-lai-fapar/constraints.json LAI LAI satellite-lai-fapar LAI Land Biosphere
LAI 1981-09-20 00:00:00 2024-12-31 00:00:00
{'Product': 'LAI', 'ECV': 'LAI', 'DateBeg': Timestamp('1981-09-20 00:00:00'), 'DateEnd': Timestamp('2024-12-31 00:00:00'),

In [4]:
prod_dic

{0: {'Product': 'Aerosols',
  'ECV': 'Aerosols',
  'DateBeg': Timestamp('1995-06-01 00:00:00'),
  'DateEnd': Timestamp('2025-07-31 00:00:00'),
  'Thematic Hub': 'Atmospheric Composition'},
 1: {'Product': 'CO2',
  'ECV': 'Greenhouse Gases',
  'DateBeg': Timestamp('2002-10-01 00:00:00'),
  'DateEnd': Timestamp('2024-12-31 00:00:00'),
  'Thematic Hub': 'Atmospheric Composition'},
 2: {'Product': 'CH4',
  'ECV': 'Greenhouse Gases',
  'DateBeg': Timestamp('2002-10-01 00:00:00'),
  'DateEnd': Timestamp('2024-12-31 00:00:00'),
  'Thematic Hub': 'Atmospheric Composition'},
 3: {'Product': 'Ozone',
  'ECV': 'Ozone',
  'DateBeg': Timestamp('1970-04-01 00:00:00'),
  'DateEnd': Timestamp('2025-07-31 00:00:00'),
  'Thematic Hub': 'Atmospheric Composition'},
 4: {'Product': 'Clouds - CMSAF CLARA-A3',
  'ECV': 'Clouds',
  'DateBeg': Timestamp('1979-01-01 00:00:00'),
  'DateEnd': Timestamp('2025-06-30 00:00:00'),
  'Thematic Hub': 'Atmospheric Physics'},
 5: {'Product': 'Clouds - CCI/C3S',
  'ECV': '

In [5]:


# ecv_pd = pd.DataFrame([conf['ECV'].keys(),datesbeg,datesend],index=['DateBeg','DateEnd']).T
prod_pd = pd.DataFrame.from_dict(prod_dic,orient='index').sort_values(['Thematic Hub','ECV','Product'])
prod_pd['DateBeg'] = prod_pd['DateBeg'].dt.ceil(freq='s')  
prod_pd['DateEnd'] = prod_pd['DateEnd'].dt.ceil(freq='s')  
prod_pd['DateEnd'] = prod_pd['DateEnd'].apply(lambda dt: dt.strftime("%Y-%m-%d"))
prod_pd['DateBeg'] = prod_pd['DateBeg'].apply(lambda dt: dt.strftime("%Y-%m-%d"))

# custom order - looks nice but logic is not perfect
# prod_pd = prod_pd.reindex([1,2,0,3,
#                           14,15,13,9,7,4,11,10,5,12,8,6,18,19,17,16,
#                           26,25,22,23,24,21,20,
#                           27,28,29,
#                           33,32,34,35,30,31,
#                           40,41,42,43,36,37,38,39])

# custom order - looks nice and groups per ECV
# prod_pd = prod_pd.reindex([1,2,0,3,
#                           14,15,13, 4,5, 11,12,   10,8,  9,7,6,  18,19,17,16,
#                           26,25,22,23,24,21,20,
#                           27,28,29,
#                           33,32,34,35,30,31,
#                           40,41,42,43,36,37,38,39])

today_date = pd.Timestamp.today().strftime('%Y%m%d') 
print(today_date)
print(prod_pd.to_markdown())
prod_pd.to_excel(f'ECV_time_coverage_perProduct_{today_date}.xlsx')

# fig = px.timeline(ecv_pd, x_start="DateBeg", x_end="DateEnd", y='Product',color='Lot')
fig = px.timeline(prod_pd, x_start="DateBeg", x_end="DateEnd",y='Product',color='Thematic Hub')

# fig = px.timeline(datasets_df, x_start="startdate", x_end="enddate", y='ECV')
fig.update_yaxes(autorange="reversed")
fig.update_layout(
    autosize=False,
    width=1200,
    height=800,
)
# fig.update_layout(
#     xaxis = dict(
#         dtick = 'Y1',
#         tickformat="%Y",
#     )
# )

xlab = np.arange(1970,thisyear+1).astype('int')
xlabtxt = [f'{i}' for i in xlab]


fig.update_xaxes(minor=dict(ticks="inside", showgrid=True))
fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = xlab,
        ticktext = xlabtxt
    )
)
fig.update_xaxes(tickangle=-45)
fig.update_layout(
    xaxis = dict(
        tickfont = dict(
            size=10),
        )
    )
fig.update_xaxes(range = ['1970-01-01',f'{thisyear-1}-12-31'])
# print(fig)
today_date = pd.Timestamp.today().strftime('%Y%m%d') 
print(today_date)
fig.write_image(f'temporal_coverage_by_Product_{today_date}.pdf')
fig.write_image(f'temporal_coverage_by_Product_{today_date}.png')
fig.show()

20260217
|    | Product                                         | ECV                      | DateBeg    | DateEnd    | Thematic Hub            |
|---:|:------------------------------------------------|:-------------------------|:-----------|:-----------|:------------------------|
|  0 | Aerosols                                        | Aerosols                 | 1995-06-01 | 2025-07-31 | Atmospheric Composition |
|  2 | CH4                                             | Greenhouse Gases         | 2002-10-01 | 2024-12-31 | Atmospheric Composition |
|  1 | CO2                                             | Greenhouse Gases         | 2002-10-01 | 2024-12-31 | Atmospheric Composition |
|  3 | Ozone                                           | Ozone                    | 1970-04-01 | 2025-07-31 | Atmospheric Composition |
|  5 | Clouds - CCI/C3S                                | Clouds                   | 1995-06-01 | 2023-12-31 | Atmospheric Physics     |
|  4 | Clouds - CMSAF CLARA-A3         

In [6]:
# compare most recent versions to report the "delta" to Anna

today_date = pd.Timestamp.today().strftime('%Y%m%d') 
previous_date = pd.Timestamp('2025-09-12').strftime('%Y%m%d') 

old_df = pd.read_excel(f"ECV_time_coverage_perProduct_{previous_date}.xlsx")
new_df = pd.read_excel(f"ECV_time_coverage_perProduct_{today_date}.xlsx")

key_cols = ["Product", "ECV",  "Thematic Hub"]
compare_cols = ["DateBeg", "DateEnd" ]

# Merge on key
merged = new_df.merge(
    old_df,
    on=key_cols,
    how="left",
    suffixes=("", "_old"),
    indicator=True
)

def classify(row):
    if row["_merge"] == "left_only":
        return "added"
    else:
        # Compare non-key columns
        for col in compare_cols:
            if row[col] != row[f"{col}_old"]:
                return "modified"
        return "unchanged"

merged["status"] = merged.apply(classify, axis=1)

final = merged[new_df.columns.tolist() + ["status"]]
print(final.to_markdown())

final.to_excel(f"ECV_time_coverage_perProduct_withChangesSince_{previous_date}_{today_date}.xlsx", index=False)

|    |   Unnamed: 0 | Product                                         | ECV                      | DateBeg    | DateEnd    | Thematic Hub            | status    |
|---:|-------------:|:------------------------------------------------|:-------------------------|:-----------|:-----------|:------------------------|:----------|
|  0 |            0 | Aerosols                                        | Aerosols                 | 1995-06-01 | 2025-07-31 | Atmospheric Composition | modified  |
|  1 |            2 | CH4                                             | Greenhouse Gases         | 2002-10-01 | 2024-12-31 | Atmospheric Composition | modified  |
|  2 |            1 | CO2                                             | Greenhouse Gases         | 2002-10-01 | 2024-12-31 | Atmospheric Composition | modified  |
|  3 |            3 | Ozone                                           | Ozone                    | 1970-04-01 | 2025-07-31 | Atmospheric Composition | modified  |
|  4 |            5 | 

In [65]:
# new tweaked plot

df = prod_pd.copy(deep=True)
df["DateBeg"] = pd.to_datetime(df["DateBeg"])
df["DateEnd"] = pd.to_datetime(df["DateEnd"])

hub_colors = {
    "Atmospheric Composition": "#f4a261",
    "Atmospheric Physics": "#ff6f00",
    "Cryosphere": "#1f9ebc",
    "Land Biosphere": "#a6b84c",
    "Land Hydrology": "#4caf0b",
    "Ocean": "#1f4e8c"
}

fig = go.Figure()

y_positions = []
y_labels = []
hub_first_y = {}
current_y = 0

# ---------- BUILD TIMELINE ----------
for hub in df["Thematic Hub"].unique():

    hub_df = df[df["Thematic Hub"] == hub]

    hub_first_y[hub] = current_y  # store first row position

    for _, row in hub_df.iterrows():

        # Line + markers
        fig.add_trace(go.Scatter(
            x=[row["DateBeg"], row["DateEnd"]],
            y=[current_y, current_y],
            mode="lines+markers",
            line=dict(color=hub_colors[hub], width=3),   # thinner
            marker=dict(
                size=6,
                color=hub_colors[hub],
                line=dict(color="black", width=1)
            ),
            showlegend=False
        ))

        # End date
        fig.add_annotation(
            x=row["DateEnd"],
            y=current_y,
            text=row["DateEnd"].strftime("%b %d %Y"),
            showarrow=False,
            xanchor="left",
            xshift=6,
            font=dict(size=9, color="rgba(0,0,0,0.65)")
        )

        # Start date
        fig.add_annotation(
            x=row["DateBeg"],
            y=current_y,
            text=row["DateBeg"].strftime("%b %d %Y"),
            showarrow=False,
            xanchor="right",
            xshift=-6,
            font=dict(size=9, color="rgba(0,0,0,0.65)")
        )

        y_labels.append(row["Product"])
        y_positions.append(current_y)
        current_y -= 1

    current_y -= 1.8  # extra spacing between hubs


# ---------- REMOVE DEFAULT Y TICKS ----------
fig.update_yaxes(showticklabels=False, showgrid=False)


# Product labels
for y_val, label in zip(y_positions, y_labels):
    fig.add_annotation(
        x=-0.30,
        y=y_val,
        xref="paper",
        yref="y",
        text=label,
        showarrow=False,
        xanchor="left",
        align="left",
        font=dict(size=13, color="#2c3e50")
    )

# Hub headers
for hub, y_val in hub_first_y.items():
    fig.add_annotation(
        x=-0.30,
        y=y_val + 0.9,
        xref="paper",
        yref="y",
        text=f"<b>{hub}</b>",
        showarrow=False,
        xanchor="left",
        align="left",
        font=dict(size=16, color=hub_colors[hub])
    )


# ---------- AXES STYLE ----------
fig.update_xaxes(
    showgrid=True,
    gridcolor="rgba(0,0,0,0.08)",  # lighter
    griddash="dash",
    zeroline=False
)


# ---------- LAYOUT ----------
fig.update_layout(
    title=dict(
        text="<b>ECV time coverage, by product</b>",
        x=0.06,
        y=0.925,
        xanchor="left",
        font=dict(size=20, family="Roboto")
    ),
    font=dict(family="Roboto", size=13),
    plot_bgcolor="white",
    paper_bgcolor="white",
    margin=dict(l=260, r=60, t=90, b=60),
    width=1000,
    height=1200,
    autosize=False
)


fig.write_image(f'temporal_coverage_by_Product_{today_date}_v2.png', scale=2)
fig.show()

In [1]:
print(df.to_markdown())

NameError: name 'df' is not defined