
#### purpose

Extract the time coverage of data in the cloudnet database



#### useful links

- https://docs.cloudnet.fmi.fi/api/data-portal.html
- https://cloudnet.fmi.fi/api/sites
- https://cloudnet.fmi.fi/api/raw-files?site=norunda&updatedAtFrom=2021-09-01
- https://cloudnet.fmi.fi/api/files?site=norunda&dateFrom=2019-03-01&product=classification


In [1]:
import datetime
# import copy
# import scipy.stats
import numpy as np
# import cartopy
# import cartopy.crs as ccrs
# import matplotlib
# import matplotlib.pyplot as plt
# import toml

import requests
import pprint

from IPython.display import JSON

In [2]:
%matplotlib inline

In [3]:
def dt_to_ts(dt):
    """convert a datetime to unix timestamp"""
    return (dt - datetime.datetime(1970, 1, 1)).total_seconds()


In [4]:


url = 'https://cloudnet.fmi.fi/api/files'
payload = {
    'date': '2020-10-01',
    'product': 'classification'
}
metadata = requests.get(url, payload).json()
for row in metadata:
    print(row)
    #res = requests.get(row['downloadUrl'])
    #with open(row['filename'], 'wb') as f:
    #    f.write(res.content)


{'uuid': '75702787-dad6-466b-ad1e-f8093544eb74', 'version': 'exq3GmTLZpl2nnLZby-iVTEQU2TcECb', 'pid': 'https://hdl.handle.net/21.12132/1.75702787dad6466b', 'volatile': False, 'legacy': False, 'quality': 'nrt', 'measurementDate': '2020-10-01', 'history': '2021-08-14 05:18:31 - classification file created\n2021-08-14 05:18:11 - categorize file created\n2021-08-11 01:40:14 - radar file created\n2021-08-11 01:40:24 - ceilometer file created', 'checksum': '02541d889b1567a8f7a6690fab5b3cb2362671cace54d57d2b13197e8b945738', 'size': '207190', 'format': 'HDF5 (NetCDF4)', 'qualityScore': None, 'createdAt': '2021-08-14T05:18:33.736Z', 'updatedAt': '2021-08-14T05:18:33.736Z', 'sourceFileIds': ['316310b02af8495d867300b29618a7cc'], 'cloudnetpyVersion': '1.18.0', 'site': {'id': 'munich', 'humanReadableName': 'Münich', 'type': ['cloudnet'], 'latitude': 48.148, 'longitude': 11.573, 'altitude': 538, 'gaw': 'Unknown', 'dvasId': 'MUN', 'country': 'Germany'}, 'product': {'id': 'classification', 'humanReada

In [5]:
url = 'https://cloudnet.fmi.fi/api/sites'

sites = requests.get(url).json()
print(len(sites))

pprint.pprint(sites[:2])

for s in sites:
    print(f" {s['id']:20s} {s['humanReadableName']:25s} {s['latitude']:8.3f} {s['longitude']:8.3f} {s['altitude']:6.1f} {s['type']}")
# JSON(sites)

138
[{'altitude': 167,
  'country': 'United Arab Emirates',
  'dvasId': None,
  'gaw': 'Unknown',
  'humanReadableName': 'Al Dhaid',
  'id': 'al-dhaid',
  'latitude': 25.234,
  'longitude': 55.976,
  'type': ['hidden']},
 {'altitude': 380,
  'country': 'Norway',
  'dvasId': None,
  'gaw': 'Unknown',
  'humanReadableName': 'Alomar',
  'id': 'alomar',
  'latitude': 62.278,
  'longitude': 16.008,
  'type': ['hidden']}]
 al-dhaid             Al Dhaid                    25.234   55.976  167.0 ['hidden']
 alomar               Alomar                      62.278   16.008  380.0 ['hidden']
 amapola              Amapola                     37.911   -3.228  370.0 ['hidden']
 antikythera          Antikythera                 35.860   23.310  193.0 ['hidden']
 apukka               Apukka                      66.579   26.011  106.0 ['hidden']
 are                  Åre                         63.402   13.076  400.0 ['hidden']
 arm-andoya           Andøya                      69.141   15.684    2.0 ['a

In [6]:
sites_to_scrape = [s for s in sites if not 'hidden' in s['type']]

print(len(sites_to_scrape))
for s in sites_to_scrape:
    print(f" {s['id']:20s} {s['humanReadableName']:25s} {s['latitude']:8.3f} {s['longitude']:8.3f} {s['altitude']:6.1f} {s['type']}")

44
 arm-andoya           Andøya                      69.141   15.684    2.0 ['arm']
 arm-ascension        Ascension Island            -7.967  -14.350  341.0 ['arm']
 arm-aware            West Antarctic             -77.850  166.730   76.0 ['arm']
 arm-cape-cod         Cape Cod                    42.031  -70.049   15.0 ['arm']
 arm-darwin           Darwin                     -12.426  130.898   30.0 ['arm']
 arm-graciosa         Graciosa                    39.092  -28.031   30.0 ['arm']
 arm-maldives         Maldives                     1.978   73.396    1.0 ['arm']
 arm-murgtal          Murgtal                     48.540    8.397  511.0 ['arm']
 arm-niamey           Niamey                      13.513    2.049  223.0 ['arm']
 arm-sgp              Southern Great Plains       36.608  -97.488  316.0 ['arm']
 arm-yacanto          Villa Yacanto              -32.126  -64.728 1141.0 ['arm']
 barbados             Barbados                    13.164  -59.432   20.0 ['campaign']
 bucharest          

In [7]:
# check for classification file coverage


#url = 'https://cloudnet.fmi.fi/api/files?site=norunda&dateFrom=2019-03-01&product=classification'

url = 'https://cloudnet.fmi.fi/api/files'


for s in sites_to_scrape:
    
    payload = {
        'dateFrom': '1990-01-01',
        'site': s['id'],
        'showLegacy': True,
        'product': 'classification'
    }
    
    files = requests.get(url, payload).json()

    print('===================================================================')
    print(s['id'], ' no files ', len(files))
    #pprint.pprint(files[:2])
    
    if len(files) == 0:
        continue

    dates_classification = sorted([f['measurementDate'] for f in files])
    print(dates_classification[:10], dates_classification[-10:])
    beginend = min(dates_classification), max(dates_classification)

    ts_classification = [dt_to_ts(datetime.datetime.strptime(d, "%Y-%m-%d")) for d in dates_classification]
    total_duration = (ts_classification[-1] - ts_classification[0] + 3600*24)/(3600*24)
    print('total period ', beginend, ' -> ', total_duration, 'days coverage ', len(files)/total_duration)

    ts_diff = np.diff(ts_classification)
    i_gap = np.where(ts_diff > 14*24*3600)[0]

    long_gaps = []
    print('gaps larger 2 weeks                 duration [days]')
    for i in i_gap:
        interruption_beginend = dates_classification[i], dates_classification[i+1]
        gap_duration = (ts_classification[i+1] - ts_classification[i] - 3600*24)/(3600*24)
        long_gaps.append([interruption_beginend, gap_duration])
        print('   ', interruption_beginend, ' -> ', gap_duration)
    print('duration long gaps', sum([e[1] for e in long_gaps]))

arm-andoya  no files  112
['2019-12-01', '2019-12-02', '2019-12-03', '2019-12-04', '2019-12-05', '2019-12-06', '2019-12-07', '2019-12-08', '2019-12-09', '2019-12-10'] ['2020-03-12', '2020-03-13', '2020-03-14', '2020-03-15', '2020-03-16', '2020-03-17', '2020-03-18', '2020-03-19', '2020-03-20', '2020-03-21']
total period  ('2019-12-01', '2020-03-21')  ->  112.0 days coverage  1.0
gaps larger 2 weeks                 duration [days]
duration long gaps 0
arm-ascension  no files  136
['2016-10-13', '2016-10-14', '2016-10-15', '2016-10-16', '2016-10-17', '2016-10-18', '2016-10-19', '2016-10-20', '2016-10-21', '2016-10-22'] ['2017-02-17', '2017-02-18', '2017-02-19', '2017-02-20', '2017-02-21', '2017-02-22', '2017-02-23', '2017-02-24', '2017-02-25', '2017-02-27']
total period  ('2016-10-13', '2017-02-27')  ->  138.0 days coverage  0.9855072463768116
gaps larger 2 weeks                 duration [days]
duration long gaps 0
arm-aware  no files  395
['2015-12-01', '2015-12-02', '2015-12-03', '2015-

hyytiala  no files  1294
['2014-02-02', '2014-02-03', '2014-02-04', '2014-02-05', '2014-02-06', '2014-02-07', '2014-02-08', '2014-02-09', '2014-02-10', '2014-02-11'] ['2021-05-01', '2021-05-02', '2021-05-03', '2021-05-04', '2021-05-05', '2021-05-06', '2021-05-07', '2021-05-08', '2021-05-09', '2021-05-10']
total period  ('2014-02-02', '2021-05-10')  ->  2655.0 days coverage  0.4873822975517891
gaps larger 2 weeks                 duration [days]
    ('2014-09-09', '2016-11-26')  ->  808.0
    ('2017-04-27', '2018-04-01')  ->  338.0
    ('2018-04-29', '2018-06-01')  ->  32.0
    ('2019-02-07', '2019-03-12')  ->  32.0
    ('2020-09-09', '2020-11-24')  ->  75.0
duration long gaps 1285.0
iquique  no files  246
['2018-03-26', '2018-03-27', '2018-03-28', '2018-03-29', '2018-03-30', '2018-03-31', '2018-04-01', '2018-04-02', '2018-04-05', '2018-04-06'] ['2019-01-11', '2019-01-12', '2019-01-13', '2019-01-14', '2019-01-15', '2019-01-19', '2019-01-20', '2019-01-21', '2019-01-22', '2019-01-23']
tota

norunda  no files  765
['2019-04-01', '2019-04-03', '2019-04-05', '2019-04-06', '2019-04-07', '2019-04-08', '2019-04-09', '2019-04-10', '2019-04-11', '2019-04-12'] ['2021-10-29', '2021-10-30', '2021-10-31', '2021-11-04', '2021-11-05', '2021-11-06', '2021-11-07', '2021-11-08', '2021-11-09', '2021-11-10']
total period  ('2019-04-01', '2021-11-10')  ->  955.0 days coverage  0.8010471204188482
gaps larger 2 weeks                 duration [days]
    ('2020-06-09', '2020-08-19')  ->  70.0
    ('2020-08-20', '2020-09-08')  ->  18.0
duration long gaps 88.0
ny-alesund  no files  1623
['2016-06-10', '2016-06-11', '2016-06-12', '2016-06-13', '2016-06-14', '2016-06-15', '2016-06-16', '2016-06-17', '2016-06-18', '2016-06-19'] ['2021-11-01', '2021-11-02', '2021-11-03', '2021-11-04', '2021-11-05', '2021-11-06', '2021-11-07', '2021-11-08', '2021-11-09', '2021-11-10']
total period  ('2016-06-10', '2021-11-10')  ->  1980.0 days coverage  0.8196969696969697
gaps larger 2 weeks                 duration [d

In [8]:
payload = {
    'dateFrom': '1990-01-01',
    'site': 'punta-arenas',
    'showLegacy': True,
    'product': 'classification'
}

files = requests.get(url, payload).json()

print('===================================================================')
print('punta-arenas', ' no files ', len(files))
#pprint.pprint(files[:2])

dates_classification = sorted([f['measurementDate'] for f in files])
print(dates_classification[:10], dates_classification[-10:])
beginend = min(dates_classification), max(dates_classification)

ts_classification = [dt_to_ts(datetime.datetime.strptime(d, "%Y-%m-%d")) for d in dates_classification]
total_duration = (ts_classification[-1] - ts_classification[0] + 3600*24)/(3600*24)
print('total period ', beginend, ' -> ', total_duration, 'days coverage ', len(files)/total_duration)

ts_diff = np.diff(ts_classification)
i_gap = np.where(ts_diff > 1*24*3600)[0]

long_gaps = []
print('gaps longer 1 day                 duration [days]')
for i in i_gap:
    interruption_beginend = dates_classification[i], dates_classification[i+1]
    gap_duration = (ts_classification[i+1] - ts_classification[i] - 3600*24)/(3600*24)
    long_gaps.append([interruption_beginend, gap_duration])
    print('   ', interruption_beginend, ' -> ', gap_duration)
print('duration long gaps', sum([e[1] for e in long_gaps]))

punta-arenas  no files  674
['2018-11-27', '2018-11-28', '2018-11-29', '2018-11-30', '2018-12-01', '2018-12-02', '2018-12-03', '2018-12-04', '2018-12-05', '2018-12-06'] ['2020-12-22', '2020-12-23', '2020-12-24', '2020-12-25', '2020-12-26', '2020-12-27', '2020-12-28', '2020-12-29', '2020-12-30', '2020-12-31']
total period  ('2018-11-27', '2020-12-31')  ->  766.0 days coverage  0.8798955613577023
gaps longer 1 day                 duration [days]
    ('2019-05-20', '2019-05-22')  ->  1.0
    ('2019-11-05', '2019-12-06')  ->  30.0
    ('2019-12-25', '2020-01-05')  ->  10.0
    ('2020-03-02', '2020-03-04')  ->  1.0
    ('2020-03-23', '2020-03-31')  ->  7.0
    ('2020-05-02', '2020-05-04')  ->  1.0
    ('2020-05-04', '2020-05-06')  ->  1.0
    ('2020-06-22', '2020-06-29')  ->  6.0
    ('2020-08-22', '2020-09-03')  ->  11.0
    ('2020-09-24', '2020-09-30')  ->  5.0
    ('2020-11-24', '2020-12-14')  ->  19.0
duration long gaps 92.0
