In [33]:
import datetime
import re


In [41]:
# check which files are already uploaded
uploaded = ! earthengine ls users/fbaart/ssh_grids_v1609
uploaded = set(uploaded)

In [42]:
len(uploaded)

1878

In [43]:
# check which files are available in cloud storage
urls = ! ~/.local/google-cloud-sdk/bin/gsutil ls gs://slr/**/*.tiff
len(urls), urls[-1]

(1913, 'gs://slr/ssh_grids/ssh_grids_v1609_2018061812_i.nc-final.tiff')

In [52]:
# extract information from the from path
pattern = re.compile(r'''
    ^                            # start of the string
    (?P<path>                    # this is the path
        gs://slr/ssh_grids/      # start of the path
        (?P<base>ssh_grids_      # this is the base path
            (?P<version>[\w\d]+) # the version number/text
            _                    # some _
            (?P<year>\d{4})      # year
            (?P<month>\d{2})     # month
            (?P<day>\d{2})       # day
            (?P<hour>\d{2})      # hour
        )                        # base path ends here
        (_i)?                    # sometimes a _i
        \.nc-final\.tiff         # end of the filename
    )                            # path ends here
    $                            # string ends here
''', re.VERBOSE)
matches = [
    pattern.match(url).groupdict() 
    for url 
    in urls
]
len(matches), len({match['base'] for match in matches}) 

(1913, 1878)

In [53]:
# make sure the imagecollection exists
! earthengine create collection users/fbaart/ssh_grids_v1609

Asset users/fbaart/ssh_grids_v1609 already exists


In [47]:
# test if the file is uploaded
match = matches[0]
asset_id = "users/fbaart/ssh_grids_v1609/" + match['base']
asset_id in uploaded

True

In [48]:
# upload all files that are not uploaded
for match in matches:
    date = datetime.datetime(
        year=int(match['year']), 
        month=int(match['month']),
        day=int(match['day']),
        hour=int(match['hour'])
    )

    date_str = date.isoformat()
    path = match['path']
    asset_id = "users/fbaart/ssh_grids_v1609/" + match['base']
    if asset_id in uploaded:
        continue
    ! earthengine upload image --asset_id $asset_id --time_start $date_str $path

In [13]:
# make sure the date is set correct
for match in matches:
    date = datetime.datetime(
        year=int(match['year']), 
        month=int(match['month']),
        day=int(match['day']),
        hour=int(match['hour'])
    )

    date_str = date.isoformat()
    path = match['path']
    asset_id = "users/fbaart/ssh_grids_v1609/" + match['base']
    if asset_id in uploaded:
        ! earthengine asset set --time_start $date_str $asset_id