In [1]:
import os
import sys
import re
import glob
import subprocess
import time
import datetime
import pytz
import exifread
import zipfile
import pandas as pd

import ee
import ee.cli
import ee.cli.commands
import ee.cli.utils

In [2]:
def run(cmd):
    print(cmd)
    subprocess.call(cmd)
    
def extractAll(zipName):
    z = zipfile.PyZipFile(zipName)
    for f in z.namelist():
        if f.endswith('/'):
            os.makedirs(f)
        else:
            z.extract(f)

In [3]:
ee.Initialize()
ee_config = ee.cli.utils.CommandLineConfig()

In [4]:
input_dir = r'P:/11202200.005-kpp-cip2018/data/Satellitedataportaal'
os.chdir(input_dir)

# find all .tif and .zip files
files = glob.glob('RapidEye/**/**/*.tif', recursive=True)
zippedfiles = glob.glob('RapidEye/**/**/*.zip', recursive=True)
print(['Number of files to upload as assets = '+str(len(files))])
print(['Number of zipped files = '+str(len(zippedfiles))])

local = pytz.timezone("Europe/Amsterdam")

['Number of files to upload as assets = 69']
['Number of zipped files = 0']


In [6]:
# Files must all be in the same file for Google Earth Engine Batch Asset Manager,
# but ftp.satellietdataportaal.nl transfer for RapidEye was categorized in monthly/daily folders

# Unzip all
for j in zippedfiles:
    filename = os.path.basename(j)
    filename_no_ext = os.path.splitext(j)[0]
    filename_unzipped = os.path.join(input_dir,filename_no_ext+'.tif')
    is_unzipped = os.path.lexists(filename_unzipped)
    if is_unzipped: 
        os.remove(os.path.join(input_dir,j))
    else:
        extractAll(j)
        print(['Unzipped .tif file from ' + filename])
        os.remove(os.path.join(input_dir,j))

dst = 'RapidEye/'
# Move all files to destination path
for k in files:
    filename = os.path.basename(k)
    path_move_file = os.path.join(input_dir,dst,filename)
    if os.path.lexists(path_move_file):
        pass
    else:
        os.rename(os.path.join(input_dir,k), path_move_file)

# delete empty directories
empty_dir = glob.glob('RapidEye/**/**')

for e in empty_dir:
    os.rmdir(os.path.join(input_dir,e))

In [7]:
files = glob.glob('RapidEye/*.tif')

# fh = open(files[0], 'rb')
# tags = exifread.process_file(fh)#, stop_tag="Image DateTime")
# print(tags)

df = pd.DataFrame(columns=['filename','system:time_start'])

print(['Number of files to upload as assets = '+str(len(files))])

for i, f in enumerate(files):
    print('Processing file ' + f + ', file index: ' + str(i))
    filename = os.path.splitext(os.path.basename(f))[0]
    
    # convert time to EE DateTime stamp
    fh = open(f, 'rb')
    tags = exifread.process_file(fh, stop_tag="Image DateTime")
    datestring = str(tags["Image DateTime"].values)
    t = datetime.datetime.strptime(datestring, '%Y:%m:%d %H:%M:%S')
    time_start = t.timestamp()
    
    # add filename and time to dataframe
    df.loc[i] = [filename, time_start]

# save dataframe as csv for metadata
df.set_index('filename', inplace=True)
meta = os.path.join(dst,'metadata.csv')
df.to_csv(meta)
df.head(n=5)

['Number of files to upload as assets = 69']
Processing file RapidEye\20170511_3163722_RE_5m_8bit_RGB_RD.tif, file index: 0
Processing file RapidEye\20170409_3162817_RE_5m_8bit_RGB_RD.tif, file index: 1
Processing file RapidEye\20170409_3163721_RE_5m_8bit_RGB_RD.tif, file index: 2
Processing file RapidEye\20170331_3263706_RE_5m_8bit_RGB_RD.tif, file index: 3
Processing file RapidEye\20170425_3163722_RE_5m_8bit_RGB_RD.tif, file index: 4
Processing file RapidEye\20170321_3163721_RE_5m_8bit_RGB_RD.tif, file index: 5
Processing file RapidEye\20170321_3162817_RE_5m_8bit_RGB_RD.tif, file index: 6
Processing file RapidEye\20170404_3162816_RE_5m_8bit_RGB_RD.tif, file index: 7
Processing file RapidEye\20170829_3263706_RE_5m_8bit_RGB_RD.tif, file index: 8
Processing file RapidEye\20170327_3163721_RE_5m_8bit_RGB_RD.tif, file index: 9
Processing file RapidEye\20170420_3163721_RE_5m_8bit_RGB_RD.tif, file index: 10
Processing file RapidEye\20170420_3162817_RE_5m_8bit_RGB_RD.tif, file index: 11
Proce

Unnamed: 0_level_0,system:time_start
filename,Unnamed: 1_level_1
20170511_3163722_RE_5m_8bit_RGB_RD,1494493000.0
20170409_3162817_RE_5m_8bit_RGB_RD,1491730000.0
20170409_3163721_RE_5m_8bit_RGB_RD,1491729000.0
20170331_3263706_RE_5m_8bit_RGB_RD,1490952000.0
20170425_3163722_RE_5m_8bit_RGB_RD,1493111000.0


In [8]:
# Use Google Earth Engine Batch Asset Manager
# Details at https://github.com/tracek/gee_asset_manager

user = "rogersckw9@gmail.com"

# Running the following within notebook doesn't work, need to input password.
# Copy and paste the following in command line to run
print("geebam upload --source {0} --dest users/rogersckw9/eo-bathymetry/rapideye -m {1} -u {2}".format(os.path.join(input_dir,dst), os.path.join(input_dir,meta), user))

geebam upload --source P:/11202200.005-kpp-cip2018/data/Satellitedataportaal\RapidEye/ --dest users/rogersckw9/eo-bathymetry/rapideye -m P:/11202200.005-kpp-cip2018/data/Satellitedataportaal\RapidEye/metadata.csv -u rogersckw9@gmail.com


In [None]:
# if uploading to Google Cloud first, follow this protocol

for i, f in enumerate(files):
    print('Processing file ' + f + ', file index: ' + str(i))

    # extract time in UTC
    fh = open(f, 'rb')
    tags = exifread.process_file(fh, stop_tag="Image DateTime")
    datestring = str(tags["Image DateTime"].values)
    t = datetime.datetime.strptime(datestring, '%Y:%m:%d %H:%M:%S')
    local_t = local.localize(t, is_dst=None)
    utc_t = local_t.astimezone(pytz.utc)
    time_start = utc_t.strftime('%Y-%m-%dT%H:%M:%S')

    # parse file names
    filename = os.path.basename(f)
    filename_no_ext = os.path.splitext(filename)[0]

    # get nodata value ... UGLY, UGLY code!
    nodata_value = -99999999
    with open(f, encoding="latin_1") as asc:
         for line in asc:
            if "nodata_value" in line.lower():
                nodata_value = line.split()[1]
                break
    
    run(r"C:\Users\Wilson\AppData\Local\Google\Cloud SDK\google-cloud-sdk\bin\gsutil.cmd cp {0} gs://eo-bathymetry/rapideye/{1}".format(os.path.join(input_dir,f),filename))
            
    # upload to GEE
    retry_count = 0

    while True:
        run("earthengine upload image --wait --asset_id=users/rogersckw9/rapideye/{1} --nodata_value={0} gs://eo-bathymetry/rapideye/{1}".format(nodata_value, filename))
                    
        check last task status
        tasks = ee.data.getTaskList()
        task_state = None
        for task in tasks:
            task_status = ee.data.getTaskStatus([task['id']])
            task_state = task_status[0]['state']
            print(task_status)
            break

        if task_state != 'FAILED':
            break # done
        else:
            retry_count += 1
            print('Retrying upload ' + str(retry_count) + ' ...')

        if retry_count > 10:
            print('Maximum number of retry reached, exiting ...')
            sys.exit(0)
    
    # set time
    run("earthengine asset set --time_start {0} users/rogersckw9/rapideye/{1}".format(time_start, filename_no_ext))
