In [15]:
#%matplotlib inline

import os
import subprocess
import itertools
import numpy as np
import requests
import pytz
import datetime
import netCDF4
from osgeo import gdal
from os import path
from osgeo.gdalconst import *
from tqdm import tqdm
from bs4 import BeautifulSoup


In [16]:
url_catalog = 'https://opendap.deltares.nl/thredds/catalog/opendap/rijkswaterstaat/jarkus/grids/catalog.html'
url_base = 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids'
ext = 'nc'
urls = []
yearupdate = 2022 # change year for update

def listFD(url, ext=''):
    page = requests.get(url).text
    soup = BeautifulSoup(page, 'html.parser')

    return [url + '/' + node.get('href') for node in soup.find_all('a') if node.get('href').endswith(ext)]


for ncfile in listFD(url_catalog, ext):
    items = ncfile.split('/catalog.html/')
    filename = items[1].split('/')[-1]
    url = url_base + '/' + filename
    if filename == 'catalog.nc':
        continue
    urls.append(url)

In [17]:
urls[:]


['http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/jarkusKB134_1110.nc',
 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/jarkusKB133_1312.nc',
 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/jarkusKB133_1110.nc',
 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/jarkusKB132_1312.nc',
 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/jarkusKB132_1110.nc',
 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/jarkusKB131_1312.nc',
 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/jarkusKB131_1110.nc',
 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/jarkusKB130_1312.nc',
 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/jarkusKB130_1110.nc',
 'http://opendap.deltares.nl/thredds/dodsC/opendap/rijkswaterstaat/jarkus/grids/ja

In [18]:
grids = []
for url in tqdm(urls[:]):
    ds = netCDF4.Dataset(url)
    times = netCDF4.num2date(ds.variables['time'][:], ds.variables['time'].units, calendar='julian')
    idbooltime = [1 if t.year == yearupdate else 0 for t in times ]
    idtime = np.where(idbooltime)[0]
    local = pytz.timezone("Europe/Amsterdam")
    times = [datetime.datetime.strptime(t.isoformat(), "%Y-%m-%dT%H:%M:%S").replace(tzinfo=pytz.utc) 
                 for t in times if t.year == yearupdate]
    if len(times) == 0:
        continue
    
    arrs = []
    z = ds.variables['z'][idtime,:,:]
    x = ds.variables['x'][:]
    y = ds.variables['y'][:]

    grids.append({
        "url": url,
        "x": x,
        "y": y,
        "z": z,
        "times": times
    })
    ds.close()


100%|██████████████████████████████████████████████████████████████████████████████████| 62/62 [03:14<00:00,  3.13s/it]


In [27]:
count = len(list(itertools.chain.from_iterable([g['times'] for g in grids])))
count

57

In [28]:
print(grids[0]['z'][0])

[[-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]
 ...
 [-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]
 [-- -- -- ... -- -- --]]


In [29]:

#cmd
#subprocess.call('gsutil cp '../output/bathymetry_1985_0001.tif' gs://eo-bathymetry-rws/vaklodingen/bathymetry_1985_0001.tif', shell=True)
#ccc=r"dir"
#ccc
#subprocess.call(ccc)

In [30]:
# Make sure you create the image collection folder in google earth engine before running

In [31]:
ee_collection_path = 'projects/deltares-rws/eo-bathymetry/jarkus'

In [32]:
def run(cmd, shell=True):
    # print(cmd)
    subprocess.call(cmd,shell=shell)

In [33]:
#for g in tqdm(grids):
#    print(g['times'])

In [34]:
start_index = 0
dirbathy = r'../../output_jarkusgrids/'
j = 0
ts = []
if not os.path.exists(dirbathy):
    os.makedirs(dirbathy)
for g in tqdm(grids):
    ncols = len(g['x'])
    nrows = len(g['y'])
    cellsize = g['x'][1] - g['x'][0]
    # taking corners
    xllcorner = np.min(g['x']-10)
    yllcorner = np.min(g['y']-10)
    nodata_value = -32767
    z = g['z']
    #print(z.shape)

    for i, t in enumerate(g['times']):
        ts.append(t)
        if i < start_index:
            i = i + 1
            continue
        j += 1
        filename = 'jarkusgrids_' + str(str(t)[:4]) + '_' + str(j).rjust(4, '0')
        filepath = dirbathy  + filename
        filepath_asc = filepath + '.asc'
        filepath_tif = filepath + '.tif'

        zi = z[i]

        with open(filepath_asc, 'w') as f:
            f.write('ncols {0}\n'.format(ncols))
            f.write('nrows {0}\n'.format(nrows))
            f.write('cellsize {0}\n'.format(cellsize))
            f.write('xllcorner {0}\n'.format(xllcorner))
            f.write('yllcorner {0}\n'.format(yllcorner))
            f.write('nodata_value {0}\n'.format(nodata_value))
            for row in range(nrows-1,-1,-1):
                s = ' '.join([str(v) for v in zi[row,]]).replace('--', str(nodata_value))
                f.write(s)
                f.write('\n')

        #cmd = 'gdal_translate -ot Float32 -a_srs EPSG:28992 -co COMPRESS=DEFLATE -co PREDICTOR=2 -co ZLEVEL=6 -of GTiff {0} {1}'\
        #    .format(filepath_asc, filepath_tif)
        # per tile
        cmd = 'gdal_translate -ot Float32 -a_srs EPSG:28992 -of COG {0} {1}'\
            .format(filepath_asc, filepath_tif)
        run(cmd)


100%|██████████████████████████████████████████████████████████████████████████████████| 57/57 [02:10<00:00,  2.28s/it]


In [35]:
nodata_value = -32767

In [36]:
# merge per year
tzinfo = ts[0].tzinfo
uyears = list(dict.fromkeys(map(lambda x: x.year, ts))) # unique years
uts = list(map(lambda x: datetime.datetime(year=x, month=1, day=1).replace(tzinfo=tzinfo), uyears)) # unique times

for ii, tt in tqdm(enumerate(uyears)):
    filename = 'jarkusgrids_' + str(str(tt)[:4])
    filepath = dirbathy + filename
    filepath_tif = [dirbathy+ll for ll in os.listdir(dirbathy) if str(tt) in ll.split('_')[1] and ll.endswith('.tif')]
    filepath_year_tif = filepath + '.tif'
    
    # per year
    files_to_mosaic = filepath_tif 
    g = gdal.Warp(filepath_year_tif, files_to_mosaic, dstSRS='EPSG:28992', 
                  outputType=gdal.GDT_Float32, format="COG",
                  options=["COMPRESS=LZW", "TILED=YES"])
    g = None 
    
    filepath_gs = 'gs://eo-bathymetry-rws/jarkusgrids/' + filename  # temporary file system in storage bucket
    #print(filepath_gs)
    cmd = 'gsutil cp {0} {1}' \
        .format(filepath_year_tif, filepath_gs)
    run(cmd, shell=True)

    filepath_ee = ee_collection_path + '/' + filename
    #print(filepath_ee)
    cmd = 'earthengine upload image --wait --asset_id={0} --nodata_value={1} {2}' \
        .format(filepath_ee, nodata_value, filepath_gs)
    run(cmd, shell=True)

    time_start = int(uts[ii].timestamp() * 1000)
    cmd = 'earthengine asset set --time_start {0} {1}' \
        .format(time_start, filepath_ee)
    run(cmd, shell=True)

    cmd = 'earthengine acl set public {0}' \
        .format(filepath_ee)
    run(cmd, shell=True)


1it [02:09, 129.58s/it]


In [None]:
# following is just for testing.

In [None]:
        filepath_gs = 'gs://eo-bathymetry-rws/jarkus/' + filename_tif
        
        #gsutil = 'D:/src/google-cloud-sdk/bin/gsutil.cmd' # relative path is not defined on Windows
        gsutil = 'gsutil'
        cmd = gsutil + ' cp {0} {1}'\
            .format(filepath_tif, filepath_gs)
        run(cmd)
        
        filepath_ee = ee_collection_path + '/' + filename        
        cmd = 'earthengine upload image --wait --asset_id={0} --nodata_value={1} {2}'\
            .format(filepath_ee, nodata_value, filepath_gs)        
        run(cmd)
        
        time_start = int(grids[0]['times'][0].timestamp() * 1000)
        cmd = 'earthengine asset set --time_start {0} {1}'\
            .format(time_start, filepath_ee)
        run(cmd)

        cmd = 'earthengine acl set public {0}'\
            .format(filepath_ee)
        run(cmd)
