In [1]:
import json
import jwt
import time
import requests
import pandas as pd
from datetime import datetime
import urllib.request
from pathlib import Path

In [2]:
# TODO
# embed the grant/token process into a function for easy refresh when the tokens exprires, while the request is being queued
# (looks like it could sometimes takes more time than the token expiration time)

In [3]:
# Load saved key
service_key = json.load(open('D:/ERA5/my_saved_key.json', 'rb'))

In [4]:
# request grant
private_key = service_key['private_key'].encode('utf-8')

claim_set = {
    "iss": service_key['client_id'],
    "sub": service_key['user_id'],
    "aud": service_key['token_uri'],
    "iat": int(time.time()),
    "exp": int(time.time() + (60 * 60)),
}
grant = jwt.encode(claim_set, private_key, algorithm='RS256')

In [5]:
# define url where dataset sits
url = 'https://land.copernicus.eu'

# Make access token request
r = requests.post(f'{url}/@@oauth2-token'
              , headers={'Accept': 'application/json', 'Content-Type': 'application/x-www-form-urlencoded'}
              , data={'grant_type': 'urn:ietf:params:oauth:grant-type:jwt-bearer', 'assertion': grant})

access_token = r.json()['access_token']

In [6]:
# test: use access token to authenticate request
r = requests.get(f'{url}/api/@search?portal_type=DataSet'
             , headers={'Accept': 'application/json', 'Authorization': f'Bearer {access_token}'})
r

<Response [200]>

In [7]:
# search for dataset to downnload
# if we're always going to download the same dataset, no need to keep this in the automated process, just need the UIDs

# increase batch size to see more datasetsin one request
b_size = '300'

# send request for list of datasets available for download
r = requests.get(f'{url}/api/@search?portal_type=DataSet&metadata_fields=UID&metadata_fields=dataset_full_format&&metadata_fields=dataset_download_information&b_size={b_size}'
             , headers={'Accept': 'application/json', 'Authorization': f'Bearer {access_token}'})

# parse response to dataframe
df = pd.json_normalize(r.json()['items'])

# find uid and download uid corresponding to dataset we want
search = 'water-bodies-global-v2-0-300m'
t = df[df['@id'].str.contains(search)]
dataset_uid = list(t.UID)[0]

temp = list(t['dataset_download_information.items'])[0]
temp_dict = {k: v for d in temp for k, v in d.items()}
download_uid = temp_dict['@id']


In [8]:
# Laos approximate bounding box from Rica, can refine later using the shapefile we use
bounding_box = [99.9050343141071551, 22.6742105539222258, 107.8147256667023299, 13.7353817230269417]

In [30]:
# make download request
start = '20230101'
end = '20231231'

def date_to_ms(date):
    dt_obj = datetime.strptime(date ,'%Y%m%d')
    millisec = int(dt_obj.timestamp() * 1000)
    return(millisec)
    

r = requests.post(f'{url}/api/@datarequest_post'
              , headers={'Accept': 'application/json'
                         , 'Content-Type': 'application/json'
                         , 'Authorization': f'Bearer {access_token}'}
              , json={'Datasets': [{'DatasetID': dataset_uid
                                    , 'DatasetDownloadInformationID': download_uid
                                    , 'OutputFormat': 'Netcdf'
                                    , 'OutputGCS': 'EPSG:4326'
                                    , 'BoundingBox': bounding_box
                                    , 'TemporalFilter': {'StartDate': date_to_ms(start), 'EndDate': date_to_ms(end)}
                                    , 'Layer': 'ALL BANDS'}]})

r.json()


{'ErrorTaskIds': [], 'TaskIds': [{'TaskID': '3126130908'}]}

In [9]:
# status update
r = requests.get(f'{url}/api/@datarequest_search?'
             , headers={'Accept': 'application/json'
                        , 'Authorization': f'Bearer {access_token}'})

df = (pd.DataFrame(r.json())
      .T
      .sort_values(by = 'RegistrationDateTime', ascending=False)
      .reset_index(names = 'TaskID'))
df

Unnamed: 0,TaskID,Datasets,FMETaskId,FinalizationDateTime,Message,RegistrationDateTime,Status,UserID,DownloadURL,FileSize
0,3126130908,"[{'BoundingBox': [99.90503431410716, 22.674210...",98365,,,2024-08-28T15:36:41.778626,In_progress,n00gpr0h,,
1,64895917556,"[{'BoundingBox': [99.90503431410716, 22.674210...",98363,2024-08-28T15:30:38.331873,Download process finished successfully,2024-08-28T15:29:45.152270,Finished_ok,n00gpr0h,https://copernicus-fme.eea.europa.eu/clmsdatad...,553424.0
2,18370004362,"[{'BoundingBox': [99.90503431410716, 22.674210...",98362,2024-08-28T15:23:28.080541,Download process finished successfully,2024-08-28T15:22:32.891227,Finished_ok,n00gpr0h,https://copernicus-fme.eea.europa.eu/clmsdatad...,553424.0
3,48916126757,"[{'BoundingBox': [99.90503431410716, 22.674210...",98353,2024-08-28T14:35:31.542125,Download process finished successfully,2024-08-28T14:34:41.535432,Finished_ok,n00gpr0h,https://copernicus-fme.eea.europa.eu/clmsdatad...,553424.0
4,19779535516,"[{'BoundingBox': [99.90503431410716, 22.674210...",98350,2024-08-28T14:12:34.917912,Download process finished successfully,2024-08-28T14:11:44.271221,Finished_ok,n00gpr0h,https://copernicus-fme.eea.europa.eu/clmsdatad...,553424.0
5,76357526870,"[{'BoundingBox': [99.90503431410716, 22.674210...",98349,2024-08-28T14:11:01.127970,Download process finished successfully,2024-08-28T14:09:44.646511,Finished_ok,n00gpr0h,https://copernicus-fme.eea.europa.eu/clmsdatad...,553424.0
6,53999870204,"[{'BoundingBox': [99.90503431410716, 22.674210...",98348,2024-08-28T14:09:03.406059,Download process finished successfully,2024-08-28T14:07:52.571777,Finished_ok,n00gpr0h,https://copernicus-fme.eea.europa.eu/clmsdatad...,553424.0
7,96231943363,"[{'BoundingBox': [99.90503431410716, 22.674210...",98347,2024-08-28T14:05:50.447116,Download process finished successfully,2024-08-28T14:05:02.613241,Finished_ok,n00gpr0h,https://copernicus-fme.eea.europa.eu/clmsdatad...,553424.0
8,8236258023,"[{'BoundingBox': [99.90503431410716, 22.674210...",98345,2024-08-28T14:03:40.707954,Download process finished successfully,2024-08-28T14:02:57.681081,Finished_ok,n00gpr0h,https://copernicus-fme.eea.europa.eu/clmsdatad...,553424.0
9,24396708482,"[{'BoundingBox': [99.90503431410716, 22.674210...",97359,2024-08-26T06:19:11.959817,Code:L-002. Process rejected. Incorrect band n...,2024-08-26T06:18:15.586107,Rejected,n00gpr0h,,


In [37]:
# download approved requests
# (this is very DIY for now, need to talk to Rica about automating and making sure we only download once)

# Ensure the destination directory exists
download_dir = Path('D:/ERA5/')
download_dir.mkdir(parents=True, exist_ok=True)

available_downloads = df[df.Status == 'Finished_ok'].TaskID.to_list()

for i in available_downloads:
    t = df[df.TaskID == i]
    download_url = t.DownloadURL.values[0]
    download_filename = download_url.split('/')[-1]
    download_filepath = download_dir / download_filename
    
    print(f'Download URL: {download_url}')
    print(f'Download path: {download_filepath}')

    # Check if file exists
    if download_filepath.is_file():
        print(f'{download_filename} has already been downloaded')
    else:
        try:
            # Download the data
            urllib.request.urlretrieve(download_url, download_filepath)
            print(f'Successfully downloaded {download_filename}')
        except Exception as e:
            print(f'Failed to download {download_filename}. Error: {e}')

Download URL: https://copernicus-fme.eea.europa.eu/clmsdatadownload/results/98363.zip
Download path: D:\ERA5\98363.zip
98363.zip has already been downloaded
Download URL: https://copernicus-fme.eea.europa.eu/clmsdatadownload/results/98362.zip
Download path: D:\ERA5\98362.zip
98362.zip has already been downloaded
Download URL: https://copernicus-fme.eea.europa.eu/clmsdatadownload/results/98353.zip
Download path: D:\ERA5\98353.zip
98353.zip has already been downloaded
Download URL: https://copernicus-fme.eea.europa.eu/clmsdatadownload/results/98350.zip
Download path: D:\ERA5\98350.zip
98350.zip has already been downloaded
Download URL: https://copernicus-fme.eea.europa.eu/clmsdatadownload/results/98349.zip
Download path: D:\ERA5\98349.zip
98349.zip has already been downloaded
Download URL: https://copernicus-fme.eea.europa.eu/clmsdatadownload/results/98348.zip
Download path: D:\ERA5\98348.zip
98348.zip has already been downloaded
Download URL: https://copernicus-fme.eea.europa.eu/clmsdat