In [None]:
import pandas as pd
import urllib.request
import os
from datetime import datetime,date
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import json
import yaml
from calendar import monthrange
import xarray as xr
import cdsapi
import zipfile
from glob import glob

#### IMPORTANT ############
#before running this script, go to cads_forms_json, git pull, then git checkout prod
###########################

# general recipe taken from https://www.datacamp.com/tutorial/how-to-make-gantt-chart-in-python-matplotlib

def calculate_bars(timeseries,dates):
    sdates = []
    edates = []
    durations = []
    
    isBar = False # no bar by default at beginning
    for i in range(len(timeseries)-1): # note the -1, last date is treated explicitly below
        if (timeseries[i] == 1) and not isBar: #beginning of bar
            startdate = i
            isBar = True
        elif (timeseries[i] == 0) and isBar: # end of bar
            enddate = i-1
            duration = enddate-startdate+1
            sdates.append(startdate)
            edates.append(enddate)
            durations.append(duration)
            isBar = False
    i=len(timeseries)-1 # last element/date. need to close any pending bar (or create a new one)
    if isBar and (timeseries[i] == 1): # 11 bar closes at last month
        enddate=i
        duration = enddate-startdate+1
        sdates.append(startdate)
        edates.append(enddate)
        durations.append(duration)
    elif not isBar and (timeseries[i] == 1): #01 - bar with only one month
        startdate=i
        enddate=i
        duration = enddate-startdate+1
        sdates.append(startdate)
        edates.append(enddate)
        durations.append(duration)
    elif isBar and (timeseries[i] == 0): #10 bar closes at the previous month
        enddate=i-1
        duration = enddate-startdate+1
        sdates.append(startdate)
        edates.append(enddate)
        durations.append(duration)
    else: # 00 - there was no open bar, nothing needs to be done
        pass
    
    return sdates,edates,durations
def coverage_soil_moisture(df):
    # 'variable' and 'time aggregation' are not treated in any special way
    # if a daily product has a slot in a given month, it is assumed that month is fully covered
    # this could be fine tuned in the future, to account for higher granularity (sub-monthly)

    coverage ={}
    for index,row in df.iterrows():
        
        #if combination sensor - version does not exist, create entry in dic
        # cdr and icdr info are stored separately for each version
        # we need these loops because type_of_sensor might not be unique for each row
        for typeSensor in row['type_of_sensor']:
            for version in row['version']:
                sensversion = typeSensor+'_'+version
                if sensversion not in coverage:
                    print(sensversion+ ' entry created...')
                    coverage[sensversion] = {
                        'cdr' : np.zeros(len(dates)).astype(int),
                        'icdr' : np.zeros(len(dates)).astype(int),
                    }
                # now for each entry create a list of possible dates
                list_dates = [pd.Timestamp(int(yy),int(mm),1) for yy in row['year'] for mm in row['month']]
                for typrecord in row['type_of_record']:
                    if sensversion=='active_v201706': print('active_v201706',typrecord,list_dates)
                    coverage[sensversion][typrecord][dates.isin(list_dates)]=1
                    
    return coverage
with open('config-athos.yml') as f:
    conf= yaml.safe_load(f)
    
cds_form_dir=conf['cds_form_dir']
datasets_dir = conf['datasets_dir']

dates = pd.date_range(start='1970-01-01',end=pd.Timestamp.today(),freq='MS')


: 

In [None]:

ecv=conf['PRODUCT']['Soil Moisture']
entry = ecv['entry'][0] # mind cases of multiple entries -- to do
# print(entry)
jfilepath=f'{cds_form_dir}{entry}/constraints.json'
# print(jfilepath)

#Opening JSON file
f = open(jfilepath)
# returns JSON object as # a dictionary
data = json.load(f)
df = pd.DataFrame(data)

coverage=coverage_soil_moisture(df)


: 

In [None]:
# now build the pd dataframe for the final bars

barsList = []
for k in coverage.keys(): # iterate on sensversion
    datarec = coverage[k]
    # print(k,datarec)
    for typRec in datarec.keys(): # cdr,icdr
        
        sdates,edates,durations = calculate_bars(datarec[typRec],dates)
        for item in k.split('_'):
            if item[0]=='v': version=item
        sensor=k.split('_')[0]
        if sensor=='combined': sensor='combined_passive_and_active'
        
        # print(datarec[typRec])
        # print(k,typRec,dates[sdates],dates[edates],durations)
        for i in range(len(sdates)):
            barsList.append([k,sensor,version,typRec,sdates[i],edates[i],durations[i]])
dfBars = pd.DataFrame(barsList,columns=['sensversion','sensor','version','typRec','sdate','edate','duration'])
dfBars = dfBars.sort_values(by=['sensor','version'])

rec_alpha={'cdr':1,'icdr':0.5}
sensor_color={
    'active':'r',
    'passive': 'b',
    'combined_passive_and_active': 'g',
}


def get_ylabels(lables):
    ylabels=[]
    for label in lables:
        if 'active' in label.split('_')[0]:
            ylabels.append(label[7::])
        elif 'passive' in label.split('_')[0]:
            ylabels.append(label[8::])
        elif 'combined' in label.split('_')[0]:
            ylabels.append(label[28::])
        else:
            print('unknown ylabel...')
    return ylabels

# dfBars['ylabels'] = dfBars.apply(get_ylabels,axis=1)        
plt.close('all')
fig, ax = plt.subplots(figsize=(9.2, 5))
ax.invert_yaxis()

for index, row in dfBars.iterrows():
    # ax.barh(y=row['sensversion'],width=row['duration'],left=row['sdate'],color=rec_colors[row['typRec']])
    label =row['sensor']
    if label=='combined_passive_and_active': label='combined'
    ax.barh(
        y=row['sensversion'],
        width=row['duration'],
        left=row['sdate'],
        color=sensor_color[row['sensor']],
        alpha=rec_alpha[row['typRec']],
        label=label+'/'+row['typRec'],
        # tick_label=row['ylabels'] 
        )
ax.set_xticks(np.arange(0,len(dates)+12,12))
xticklabels = np.arange(dates.min().year,dates.max().year+2)
ax.set_xticklabels(xticklabels,fontsize=6,rotation=45)

labels = [item.get_text() for item in ax.get_yticklabels()]
ylables = get_ylabels(labels)
ax.set_yticklabels(ylables)
# ax.set_yticks(range(len(ylabels)),ylabels)

plt.yticks(fontsize=6)
plt.title('Soil Moisture')
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
plt.legend(by_label.values(), by_label.keys(),title='Sensor Type/record type')
plt.show()

: 

In [None]:
a=ax.get_yticklabels()
print(a[0])

: 