In [1]:
import pandas as pd
import urllib.request
import os
from datetime import datetime,date
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px

# Aerosols -  monthly

reclist=[]

# Read manifest file for Lot2 AER - latest and get relaevant information into a Pandas Dataframe
# -- check if this is the correct manifest file...

# note: the variable name is not been taking into account here, because it is repeated for each platform/sensor pair
# to retrieve that information, just do varname=line.decode('utf-8').split('/')[5]

fname = 'http://wdc.dlr.de/C3S_312b_Lot2/manifest_C3S_312b_Lot2_AER_L3MONTHLY_latest.txt'
for line in urllib.request.urlopen(fname):
    recstr = line.decode('utf-8').split('/')[-1].split('-')
    rec = {
        'level': recstr[2],
        # 'variable': recstr[3],
        'platform': recstr[5],
        'sensor': recstr[4],
        'algorithm': recstr[6],
        'frequency': recstr[7],
        'version' : recstr[8][0:-4],
        'date' : pd.Timestamp(datetime(int(recstr[0][0:4]),int(recstr[0][4:6]),1,0,0,0)),
    }
    if rec not in reclist: reclist.append(rec) # records of the same platform/sensor pair - different variable - are removed here
df = pd.DataFrame(reclist)

algorithms = sorted(list(df['algorithm'].unique()))

# timeseries go from 1970 until end of last year with available data in the manifest file
lastyear=df['date'].max().year
dates = pd.date_range('1970-01-15',f'{lastyear}-12-1',freq='MS')

# for each variable , then for each algorithm, then for each of its versions, check which months are covered
# colors will be platform/sensor pairs

barList = []
for algo in algorithms:
    # print(df[df['algorithm']==algo])
    
    df2 = df[df['algorithm']==algo].copy()
    versions=sorted(list(df2['version'].unique()))
    
    for v,version in enumerate(versions):
        df3 = df2[df2['version']==version]
        platforms=sorted(list(df3['platform'].unique()))
        for p,platform in enumerate(platforms):
            df4 = df3[df3['platform']==platform]
            timeseries = np.ones([len(dates)])*-1
            sens_plat_list=[] # list of all sensor/platforms that could use this algo version

            for t,date in enumerate(dates):
                if (date.to_datetime64() in df4['date'].values):
                    platform = df4.loc[df4['date']==date.to_datetime64(),['platform']].values[0][0]
                    # print(df4.loc[df4['date']==date.to_datetime64(),['platform']])
                    # raise SystemExit
                    sensor = df4.loc[df4['date']==date.to_datetime64(),['sensor']].values[0][0]
                    sensor_platform = f'{sensor}-{platform}'
                    if sensor_platform not in sens_plat_list:
                        sens_plat_list.append(sensor_platform)
                    #find index of this pair on the list
                    index=[i for i in range(len(sens_plat_list)) if (sens_plat_list[i]==sensor_platform)]
                    # print(platform_sensor)
                    # raise SystemExit
                    timeseries[t]=index[0]
            # plt.figure()
            # plt.plot(dates,timeseries)
            # plt.show()
            
            # now extract information about the bars to draw in the final plot
            # each individual bar is a new entry in a new DataFrame

            for i in range(len(dates)):
                if i==0:
                    if timeseries[1]==-1: # no data in beginning of timeseries, then do nothing
                        continue
                    else: # data in the very beginning - create new bar
                        bar = {
                            'algorithm_version' : f'{algo}_{version}_{platform}',
                            'platform_sensor': sens_plat_list[timeseries[t]],
                            'startdate': dates[i],
                        }
                        continue
                elif i==len(timeseries)-1: # case of last date
                    if(timeseries[i]==timeseries[i-1]):
                        if timeseries[i]==-1: # no data in last date, no active bar
                            continue
                        else: # active bar, needs to be closed
                            bar['enddate']=dates[i]
                            barList.append(bar)
                            continue
                    else: #there is a change in the timeseries. weather new satellite or end of last bar
                        if timeseries[i]==-1:
                            bar['enddate']=dates[i-1] # no data in last date, just close last active bar
                            barList.append(bar)
                        else: # new bar in the very last date. need to close last and initialize and close a new one
                            bar['enddate']=dates[i-1]
                            barList.append(bar)
                            bar = {
                                'algorithm_version' : f'{algo}_{version}_{platform}',
                                'platform_sensor': sens_plat_list[timeseries[t]],
                                'startdate': dates[i],
                                'enddate': dates[i],
                            }
                            barList.append(bar)
                            continue
                else: # general case
                    if(timeseries[i]==timeseries[i-1]): # no change, just continue to next date
                        continue
                    else: # weather no data or new satellite takes over. previous bar needs to be closed
                        if (timeseries[i]==-1) and (timeseries[i-1]!=-1): # no data from now on, just continue
                            bar['enddate']=dates[i-1]
                            barList.append(bar) 
                            continue
                        
                        else: #a different satellite takes over. close last bar and start new one
                            if timeseries[i-1]!=-1: # if i-1 had active bar, it needs to be closed
                                bar['enddate']=dates[i-1]
                                barList.append(bar)
                            # then open new bar    
                            bar = {
                                'algorithm_version' : f'{algo}_{version}_{platform}',
                                'platform_sensor': sens_plat_list[int(timeseries[t])],
                                'startdate': dates[i],
                            }
                            continue

df_bars = pd.DataFrame(barList)
print(df_bars)

fig = px.timeline(df_bars, x_start="startdate", x_end="enddate", y='algorithm_version',color='platform_sensor',title='Aerosols Monthly Products')
# fig = px.timeline(df_bars, x_start="startdate", x_end="enddate", y='algorithm_version')
# fig = px.timeline(datasets_df, x_start="startdate", x_end="enddate", y='ECV')
fig.update_yaxes(autorange="reversed")
fig.update_layout(
    autosize=True,
    width=1200,
    height=2000,
)
fig.show()
today_date = pd.Timestamp.today().strftime('%Y%m%d') 
fig.write_image(f'aerosols_coverage_byalgoversion_by mission_{today_date}.png')
# fig.write_html('aerosols_coverage_byalgoversion_by mission.htm')

         algorithm_version platform_sensor  startdate    enddate
0        ADV_v2.30_ENVISAT   AATSR-ENVISAT 2002-05-01 2007-05-01
1        ADV_v2.30_ENVISAT   AATSR-ENVISAT 2007-07-01 2012-04-01
2           ADV_v2.30_ERS2      ATSR2-ERS2 1995-06-01 1995-12-01
3           ADV_v2.30_ERS2      ATSR2-ERS2 1996-07-01 2003-04-01
4        ADV_v3.11_ENVISAT   AATSR-ENVISAT 2002-05-01 2012-04-01
..                     ...             ...        ...        ...
185     ensemble_v3.0_ERS2      ATSR2-ERS2 1995-06-01 1995-12-01
186     ensemble_v3.0_ERS2      ATSR2-ERS2 1996-07-01 2003-06-01
187  ensemble_v3.1_ENVISAT   AATSR-ENVISAT 2002-05-01 2012-04-01
188     ensemble_v3.1_ERS2      ATSR2-ERS2 1995-06-01 1995-12-01
189     ensemble_v3.1_ERS2      ATSR2-ERS2 1996-07-01 2003-06-01

[190 rows x 4 columns]


In [2]:

reclist=[]

# Read manifest file for Lot2 O3 - latest and get relaevant information into a Pandas Dataframe
# -- check if this is the correct manifest file...

# note: the variable name is not been taking into account here, because it is repeated for each platform/sensor pair
# to retrieve that information, just do varname=line.decode('utf-8').split('/')[5]

fname = 'http://wdc.dlr.de/C3S_312b_Lot2/manifest_C3S_312b_Lot2_O3_MONTHLY_latest.txt'
for line in urllib.request.urlopen(fname):
    recstr = line.decode('utf-8').split('/')[-1].split('-')
    # print(recstr)
    rec = {
        'level': recstr[2],
        # 'variable': recstr[3],
        'platform': recstr[4],
        'sensor': recstr[5],
        'algorithm': recstr[6],
        'frequency': recstr[7],
        'version' : recstr[8][0:-4],
        'date' : pd.Timestamp(datetime(int(recstr[0][0:4]),int(recstr[0][4:6]),1,0,0,0)),
    }

    if rec not in reclist: reclist.append(rec) # records of the same platform/sensor pair - different variable - are removed here
df = pd.DataFrame(reclist)

algorithms = sorted(list(df['algorithm'].unique()))

# timeseries go from 1970 until end of last year with available data in the manifest file
lastyear=df['date'].max().year
dates = pd.date_range('1970-01-15',f'{lastyear}-12-1',freq='MS')

# for each variable , then for each algorithm, then for each of its versions, check which months are covered
# colors will be platform/sensor pairs

barList = []
for algo in algorithms:
    
    df2 = df[df['algorithm']==algo].copy()
    versions=sorted(list(df2['version'].unique()))
    
    for v,version in enumerate(versions):
        df3 = df2[df2['version']==version]
        platforms=sorted(list(df3['platform'].unique()))
        for p,platform in enumerate(platforms):
            df4 = df3[df3['platform']==platform]
            timeseries = np.ones([len(dates)])*-1
            sens_plat_list=[] # list of all sensor/platforms that could use this algo version

            for t,date in enumerate(dates):
                if (date.to_datetime64() in df4['date'].values):
                    platform = df4.loc[df4['date']==date.to_datetime64(),['platform']].values[0][0]
                    # print(df4.loc[df4['date']==date.to_datetime64(),['platform']])
                    # raise SystemExit
                    sensor = df4.loc[df4['date']==date.to_datetime64(),['sensor']].values[0][0]
                    sensor_platform = f'{sensor}-{platform}'
                    if sensor_platform not in sens_plat_list:
                        sens_plat_list.append(sensor_platform)
                    #find index of this pair on the list
                    index=[i for i in range(len(sens_plat_list)) if (sens_plat_list[i]==sensor_platform)]
                    # print(platform_sensor)
                    # raise SystemExit
                    timeseries[t]=index[0]
            # plt.figure()
            # plt.plot(dates,timeseries)
            # plt.show()
            
            # now extract information about the bars to draw in the final plot
            # each individual bar is a new entry in a new DataFrame

            for i in range(len(dates)):
                if i==0:
                    if timeseries[1]==-1: # no data in beginning of timeseries, then do nothing
                        continue
                    else: # data in the very beginning - create new bar
                        bar = {
                            'algorithm_version' : f'{algo}_{version}_{platform}',
                            'platform_sensor': sens_plat_list[timeseries[t]],
                            'startdate': dates[i],
                        }
                        continue
                elif i==len(timeseries)-1: # case of last date
                    if(timeseries[i]==timeseries[i-1]):
                        if timeseries[i]==-1: # no data in last date, no active bar
                            continue
                        else: # active bar, needs to be closed
                            bar['enddate']=dates[i]
                            barList.append(bar)
                            continue
                    else: #there is a change in the timeseries. weather new satellite or end of last bar
                        if timeseries[i]==-1:
                            bar['enddate']=dates[i-1] # no data in last date, just close last active bar
                            barList.append(bar)
                        else: # new bar in the very last date. need to close last and initialize and close a new one
                            bar['enddate']=dates[i-1]
                            barList.append(bar)
                            bar = {
                                'algorithm_version' : f'{algo}_{version}_{platform}',
                                'platform_sensor': sens_plat_list[timeseries[t]],
                                'startdate': dates[i],
                                'enddate': dates[i],
                            }
                            barList.append(bar)
                            continue
                else: # general case
                    if(timeseries[i]==timeseries[i-1]): # no change, just continue to next date
                        continue
                    else: # weather no data or new satellite takes over. previous bar needs to be closed
                        if (timeseries[i]==-1) and (timeseries[i-1]!=-1): # no data from now on, just continue
                            bar['enddate']=dates[i-1]
                            barList.append(bar) 
                            continue
                        
                        else: #a different satellite takes over. close last bar and start new one
                            if timeseries[i-1]!=-1: # if i-1 had active bar, it needs to be closed
                                bar['enddate']=dates[i-1]
                                barList.append(bar)
                            # then open new bar    
                            bar = {
                                'algorithm_version' : f'{algo}_{version}_{platform}',
                                'platform_sensor': sens_plat_list[int(timeseries[t])],
                                'startdate': dates[i],
                            }
                            continue

df_bars = pd.DataFrame(barList)
print(df_bars)

fig = px.timeline(df_bars, x_start="startdate", x_end="enddate", y='algorithm_version',color='platform_sensor',title='Ozone Monthly Products')
# fig = px.timeline(df_bars, x_start="startdate", x_end="enddate", y='algorithm_version')
# fig = px.timeline(datasets_df, x_start="startdate", x_end="enddate", y='ECV')
fig.update_yaxes(autorange="reversed")
fig.update_layout(
    autosize=True,
    width=1200,
    height=2000,
)
fig.show()
today_date = pd.Timestamp.today().strftime('%Y%m%d') 
fig.write_image(f'ozone_coverage_byalgoversion_by mission_{today_date}.png')
# fig.write_html('ozone_coverage_byalgoversion_by mission.htm')

      algorithm_version  platform_sensor  startdate    enddate
0         ALG_v0001_ACE       SCISAT-ACE 2004-02-01 2011-05-01
1         ALG_v0001_ACE       SCISAT-ACE 2011-07-01 2011-09-01
2         ALG_v0001_ACE       SCISAT-ACE 2012-01-01 2012-02-01
3         ALG_v0001_ACE       SCISAT-ACE 2012-04-01 2020-12-01
4    ALG_v0001_ANOM_LLG  MERGED-ANOM_LLG 2001-11-01 2019-12-01
..                  ...              ...        ...        ...
142      UBR_v0002_OMPS         NPP-OMPS 2012-02-01 2013-11-01
143      UBR_v0002_OMPS         NPP-OMPS 2014-01-01 2022-12-01
144    USASK_v0002_OMPS         NPP-OMPS 2012-02-01 2013-11-01
145    USASK_v0002_OMPS         NPP-OMPS 2014-01-01 2022-12-01
146    USASK_v0100_OMPS         NPP-OMPS 2012-01-01 2023-04-01

[147 rows x 4 columns]


In [3]:

reclist=[]

# Read manifest file for Lot2 O3 - latest and get relaevant information into a Pandas Dataframe
# -- check if this is the correct manifest file...

# note: the variable name is not been taking into account here, because it is repeated for each platform/sensor pair
# to retrieve that information, just do varname=line.decode('utf-8').split('/')[5]

fname = 'http://wdc.dlr.de/C3S_312b_Lot2/manifest_C3S_312b_Lot2_GHG_L2_latest.txt'
for line in urllib.request.urlopen(fname):
    recstr = line.decode('utf-8').split('/')[-1].split('-')

    rec = {
        'level': recstr[2],
        # 'variable': recstr[3],
        'platform': recstr[4],
        'sensor': recstr[5],
        'algorithm': recstr[6],
        'frequency': recstr[7],
        'version' : recstr[8][0:-4],
        'date' : pd.Timestamp(datetime(int(recstr[0][0:4]),int(recstr[0][4:6]),1,0,0,0)),
    }

    if rec not in reclist: reclist.append(rec) # records of the same platform/sensor pair - different variable - are removed here
df = pd.DataFrame(reclist)

algorithms = sorted(list(df['algorithm'].unique()))

# timeseries go from 1970 until end of last year with available data in the manifest file
lastyear=df['date'].max().year
dates = pd.date_range('1970-01-15',f'{lastyear}-12-1',freq='MS')

# for each variable , then for each algorithm, then for each of its versions, check which months are covered
# colors will be platform/sensor pairs

barList = []
for algo in algorithms:
    
    df2 = df[df['algorithm']==algo].copy()
    versions=sorted(list(df2['version'].unique()))
    
    for v,version in enumerate(versions):
        df3 = df2[df2['version']==version]
        platforms=sorted(list(df3['platform'].unique()))
        for p,platform in enumerate(platforms):
            df4 = df3[df3['platform']==platform]
            timeseries = np.ones([len(dates)])*-1
            sens_plat_list=[] # list of all sensor/platforms that could use this algo version

            for t,date in enumerate(dates):
                if (date.to_datetime64() in df4['date'].values):
                    platform = df4.loc[df4['date']==date.to_datetime64(),['platform']].values[0][0]
                    # print(df4.loc[df4['date']==date.to_datetime64(),['platform']])
                    # raise SystemExit
                    sensor = df4.loc[df4['date']==date.to_datetime64(),['sensor']].values[0][0]
                    sensor_platform = f'{sensor}-{platform}'
                    if sensor_platform not in sens_plat_list:
                        sens_plat_list.append(sensor_platform)
                    #find index of this pair on the list
                    index=[i for i in range(len(sens_plat_list)) if (sens_plat_list[i]==sensor_platform)]
                    # print(platform_sensor)
                    # raise SystemExit
                    timeseries[t]=index[0]
            # plt.figure()
            # plt.plot(dates,timeseries)
            # plt.show()
            
            # now extract information about the bars to draw in the final plot
            # each individual bar is a new entry in a new DataFrame

            for i in range(len(dates)):
                if i==0:
                    if timeseries[1]==-1: # no data in beginning of timeseries, then do nothing
                        continue
                    else: # data in the very beginning - create new bar
                        bar = {
                            'algorithm_version' : f'{algo}_{version}_{platform}',
                            'platform_sensor': sens_plat_list[timeseries[t]],
                            'startdate': dates[i],
                        }
                        continue
                elif i==len(timeseries)-1: # case of last date
                    if(timeseries[i]==timeseries[i-1]):
                        if timeseries[i]==-1: # no data in last date, no active bar
                            continue
                        else: # active bar, needs to be closed
                            bar['enddate']=dates[i]
                            barList.append(bar)
                            continue
                    else: #there is a change in the timeseries. weather new satellite or end of last bar
                        if timeseries[i]==-1:
                            bar['enddate']=dates[i-1] # no data in last date, just close last active bar
                            barList.append(bar)
                        else: # new bar in the very last date. need to close last and initialize and close a new one
                            bar['enddate']=dates[i-1]
                            barList.append(bar)
                            bar = {
                                'algorithm_version' : f'{algo}_{version}_{platform}',
                                'platform_sensor': sens_plat_list[timeseries[t]],
                                'startdate': dates[i],
                                'enddate': dates[i],
                            }
                            barList.append(bar)
                            continue
                else: # general case
                    if(timeseries[i]==timeseries[i-1]): # no change, just continue to next date
                        continue
                    else: # weather no data or new satellite takes over. previous bar needs to be closed
                        if (timeseries[i]==-1) and (timeseries[i-1]!=-1): # no data from now on, just continue
                            bar['enddate']=dates[i-1]
                            barList.append(bar) # debug HEREß
                            continue
                        
                        else: #a different satellite takes over. close last bar and start new one
                            if timeseries[i-1]!=-1: # if i-1 had active bar, it needs to be closed
                                bar['enddate']=dates[i-1]
                                barList.append(bar)
                            # then open new bar    
                            bar = {
                                'algorithm_version' : f'{algo}_{version}_{platform}',
                                'platform_sensor': sens_plat_list[int(timeseries[t])],
                                'startdate': dates[i],
                            }
                            continue

df_bars = pd.DataFrame(barList)
print(df_bars)

fig = px.timeline(df_bars, x_start="startdate", x_end="enddate", y='algorithm_version',color='platform_sensor',title='GHG Monthly Products')
# fig = px.timeline(df_bars, x_start="startdate", x_end="enddate", y='algorithm_version')
# fig = px.timeline(datasets_df, x_start="startdate", x_end="enddate", y='ECV')
fig.update_yaxes(autorange="reversed")
fig.update_layout(
    autosize=True,
    width=1200,
    height=1000,
)
fig.show()
today_date = pd.Timestamp.today().strftime('%Y%m%d') 
fig.write_image(f'GHG_coverage_byalgoversion_by mission_{today_date}.png')
# fig.write_html('GHG_coverage_byalgoversion_by mission.htm')

           algorithm_version    platform_sensor  startdate    enddate
0   BESD_v02.01.02_SCIAMACHY  ENVISAT-SCIAMACHY 2003-01-01 2012-03-01
1           EMMA_v3.0_MERGED      MERGED-MERGED 2003-01-01 2014-12-01
2           EMMA_v3.0_MERGED      MERGED-MERGED 2015-02-01 2016-12-01
3           EMMA_v3.1_MERGED      MERGED-MERGED 2003-01-01 2014-12-01
4           EMMA_v3.1_MERGED      MERGED-MERGED 2015-02-01 2017-12-01
5           EMMA_v4.1_MERGED      MERGED-MERGED 2003-01-01 2014-12-01
6           EMMA_v4.1_MERGED      MERGED-MERGED 2015-02-01 2018-12-01
7           EMMA_v4.2_MERGED      MERGED-MERGED 2003-01-01 2014-12-01
8           EMMA_v4.2_MERGED      MERGED-MERGED 2015-02-01 2019-12-01
9           EMMA_v4.3_MERGED      MERGED-MERGED 2003-01-01 2014-12-01
10          EMMA_v4.3_MERGED      MERGED-MERGED 2015-02-01 2020-06-01
11          EMMA_v4.4_MERGED      MERGED-MERGED 2003-01-01 2014-12-01
12          EMMA_v4.4_MERGED      MERGED-MERGED 2015-02-01 2021-12-01
13          EMMA_v4.