In [1]:
import os
import sys
import re
import glob
import subprocess
import time
import datetime
import pytz
import exifread
import zipfile
import gdal
import pandas as pd

import ee
import ee.cli
import ee.cli.commands
import ee.cli.utils

In [2]:
def run(cmd):
    print(cmd)
    subprocess.call(cmd)
    
def extractAll(zipName):
    z = zipfile.PyZipFile(zipName)
    for f in z.namelist():
        if f.endswith('/'):
            os.makedirs(f)
        else:
            z.extract(f)

In [3]:
ee.Initialize()
ee_config = ee.cli.utils.CommandLineConfig()

In [6]:
local = pytz.timezone("Europe/Amsterdam")
input_dir = r'P:/11202200.005-kpp-cip2018/data/Satellitedataportaal'
os.chdir(input_dir)

rapideye = False
triplesat = True

if rapideye:
    # RapidEye locations searched: 3162816, 3162817, 3162818, 3163721, 3163722, 3263706
    # find all .tif and .zip files
    files = glob.glob('RapidEye/**/**/*.tif', recursive=True)
    zippedfiles = glob.glob('RapidEye/**/**/*.zip', recursive=True)
    dst = 'RapidEye/'
    print(['Number of files to upload as assets = '+str(len(files))])
    print(['Number of zipped files = '+str(len(zippedfiles))])
elif triplesat:
    # TripleSat locations searched: Ameland, Dokkum, Haamstede, Hulst, Terschelling, Westkapelle
    files = glob.glob('TripleSat/Tri_RD_8bit_RGB_80cm/*.tif')
    zippedfiles = glob.glob('TripleSat/Tri_RD_8bit_RGB_80cm/**/**/**/*.zip')#, recursive=True)
    dst = 'TripleSat/Tri_RD_8bit_RGB_80cm/'
    print(['Number of files to upload as assets = '+str(len(files))])
    print(['Number of zipped files = '+str(len(zippedfiles))])
#     print(zippedfiles)
else:
    print("No satellite selected.")
    
os.chdir(dst)

['Number of files to upload as assets = 8']
['Number of zipped files = 1']


In [8]:
# Files must all be in the same file for Google Earth Engine Batch Asset Manager,
# but ftp.satellietdataportaal.nl transfer for RapidEye was categorized in monthly/daily folders

# Unzip all
for j in zippedfiles:
    filename = os.path.basename(j)
    filename_no_ext = os.path.splitext(j)[0]
    filename_unzipped = os.path.join(input_dir,filename_no_ext+'.tif')
    is_unzipped = os.path.lexists(filename_unzipped)
    if is_unzipped: 
        os.remove(os.path.join(input_dir,j))
    else:
        extractAll(os.path.join(input_dir,j))
        print(['Unzipped .tif file from ' + filename])
        os.remove(os.path.join(input_dir,j))

# Move all files to destination path
for k in files:
    filename = os.path.basename(k)
    path_move_file = os.path.join(input_dir,dst,filename)
    if os.path.lexists(path_move_file):
        pass
    else:
        os.rename(os.path.join(input_dir,k), path_move_file)

# delete empty directories
if rapideye:
    empty_dir = glob.glob('RapidEye/**/**')
elif triplesat:
    empty_dir = glob.glob('TripleSat/Tri_RD_8bit_RGB_80cm/**/**/**')
else:
    print("No satellite selected.")

for e in empty_dir:
    os.rmdir(os.path.join(input_dir,e))

['Unzipped .tif file from 20180224_093618_Tri_80cm_RD_8bit_RGB_Hulst.zip']


In [11]:
df = pd.DataFrame(columns=['filename','system:time_start'])

if rapideye:
    files = glob.glob('*.tif')

    print(['Number of files to upload as assets = '+str(len(files))])

    for i, f in enumerate(files):
        filename = os.path.splitext(os.path.basename(f))[0]

        # convert time to EE DateTime stamp
        fh = open(f, 'rb')
        tags = exifread.process_file(fh, stop_tag="Image DateTime")
        datestring = str(tags["Image DateTime"].values)
        t = datetime.datetime.strptime(datestring, '%Y:%m:%d %H:%M:%S')
        time_start = t.timestamp()

        # add filename and time to dataframe
        df.loc[i] = [filename, time_start]
        
elif triplesat:
    files = glob.glob('*.tif')
    print(['Number of files to upload as assets = '+str(len(files))])

    for i, f in enumerate(files):
        filename = os.path.splitext(os.path.basename(f))[0]
        datestring = re.findall(r"\d{8}_\d{6}", filename)[0]        
        t = datetime.datetime.strptime(datestring, '%Y%m%d_%H%M%S')
        time_start = t.timestamp()

        # add filename and time to dataframe
        df.loc[i] = [filename, time_start]
        
else:
    print("No satellite selected.")

# save dataframe as csv for metadata
df.set_index('filename', inplace=True)
meta = 'metadata.csv'
df.to_csv(meta)
df.head(n=5)

['Number of files to upload as assets = 9']


Unnamed: 0_level_0,system:time_start
filename,Unnamed: 1_level_1
20180318_093534_Tri_80cm_RD_8bit_RGB_Ameland,1521362000.0
20170323_091028_Tri_80cm_RD_8bit_RGB_Terschelling,1490257000.0
20170527_091129_Tri_80cm_RD_8bit_RGB_Dokkum,1495869000.0
20180319_100130_Tri_80cm_RD_8bit_RGB_Westkapelle,1521450000.0
20180224_093618_Tri_80cm_RD_8bit_RGB_Hulst,1519461000.0


In [13]:
# Use Google Earth Engine Batch Asset Manager
# Details at https://github.com/tracek/gee_asset_manager

user = "rogersckw9@gmail.com"

# Running the following within notebook doesn't work, need to input password.
# Copy and paste the following in command line to run
if rapideye:
    print("geebam upload --source {0} --dest users/rogersckw9/eo-bathymetry/rapideye -m {1} -u {2}".format(os.path.join(input_dir,dst), os.path.join(input_dir,dst,meta), user))
elif triplesat:
    print("geebam upload --source {0} --dest users/rogersckw9/eo-bathymetry/triplesat -m {1} -u {2}".format(os.path.join(input_dir,dst), os.path.join(input_dir,dst,meta), user))
else:
    print("No satellite selected.")

geebam upload --source P:/11202200.005-kpp-cip2018/data/Satellitedataportaal\TripleSat/Tri_RD_8bit_RGB_80cm/ --dest users/rogersckw9/eo-bathymetry/triplesat -m P:/11202200.005-kpp-cip2018/data/Satellitedataportaal\TripleSat/Tri_RD_8bit_RGB_80cm/metadata.csv -u rogersckw9@gmail.com


In [None]:
# if uploading to Google Cloud first, follow this protocol (this is for RapidEye)

for i, f in enumerate(files):
    print('Processing file ' + f + ', file index: ' + str(i))

    # extract time in UTC
    fh = open(f, 'rb')
    tags = exifread.process_file(fh, stop_tag="Image DateTime")
    datestring = str(tags["Image DateTime"].values)
    t = datetime.datetime.strptime(datestring, '%Y:%m:%d %H:%M:%S')
    local_t = local.localize(t, is_dst=None)
    utc_t = local_t.astimezone(pytz.utc)
    time_start = utc_t.strftime('%Y-%m-%dT%H:%M:%S')

    # parse file names
    filename = os.path.basename(f)
    filename_no_ext = os.path.splitext(filename)[0]

    # get nodata value ... UGLY, UGLY code!
    nodata_value = -99999999
    with open(f, encoding="latin_1") as asc:
         for line in asc:
            if "nodata_value" in line.lower():
                nodata_value = line.split()[1]
                break
    
    run(r"C:\Users\Wilson\AppData\Local\Google\Cloud SDK\google-cloud-sdk\bin\gsutil.cmd cp {0} gs://eo-bathymetry/rapideye/{1}".format(os.path.join(input_dir,f),filename))
            
    # upload to GEE
    retry_count = 0

    while True:
        run("earthengine upload image --wait --asset_id=users/rogersckw9/rapideye/{1} --nodata_value={0} gs://eo-bathymetry/rapideye/{1}".format(nodata_value, filename))
                    
        check last task status
        tasks = ee.data.getTaskList()
        task_state = None
        for task in tasks:
            task_status = ee.data.getTaskStatus([task['id']])
            task_state = task_status[0]['state']
            print(task_status)
            break

        if task_state != 'FAILED':
            break # done
        else:
            retry_count += 1
            print('Retrying upload ' + str(retry_count) + ' ...')

        if retry_count > 10:
            print('Maximum number of retry reached, exiting ...')
            sys.exit(0)
    
    # set time
    run("earthengine asset set --time_start {0} users/rogersckw9/rapideye/{1}".format(time_start, filename_no_ext))

    if rapideye:
    
elif triplesat:

else:
    print("No satellite selected.")

In [None]:
# If TripleSat images are larger than 10GB (RGBI tend to be) must split images first to upload to GEE.
input_dir = r'P:\11202200.005-kpp-cip2018\data\Satellitedataportaal'
os.chdir(input_dir)

# find all .tif files
files = glob.glob('TripleSat\Tri_RD_8bit_RGBI_80cm\*.tif')#, recursive=True)
print(['Number of files to upload as assets = '+str(len(files))])

for f in files:
    filename = os.path.basename(f)
    filename_no_ext = os.path.splitext(filename)[0]    
    in_path = os.path.join(input_dir,f)
    out_path = os.path.join(input_dir,filename_no_ext)
    ds = gdal.Open(in_path)
    band = ds.GetRasterBand(1)
    xsize = band.XSize
    ysize = band.YSize
    
    tile_size_x = int(math.ceil(xsize/2))
    tile_size_y = int(math.ceil(ysize/2))

    for i in range(0, xsize, tile_size_x):
        for j in range(0, ysize, tile_size_y):
            com_string = "gdal_translate -of GTIFF -srcwin " + str(i)+ ", " + str(j) + ", " + str(tile_size_x) + ", " + str(tile_size_y) + " " + str(in_path) + " " + str(out_path) + str(i) + "_" + str(j) + ".tif"
            os.system(com_string)

In [None]:
files = glob.glob('TripleSat\**\*.tif')
# Inspect a .tif file
print(gdal.Info(files[0]))