In [1]:
import os
import sys
import re
import glob
import subprocess
import time
import datetime
import pytz
import exifread
import zipfile
import gdal
import pandas as pd

import ee
import ee.cli
import ee.cli.commands
import ee.cli.utils

In [2]:
def run(cmd):
    print(cmd)
    subprocess.call(cmd)
    
def extractAll(zipName):
    z = zipfile.PyZipFile(zipName)
    for f in z.namelist():
        if f.endswith('/'):
            os.makedirs(f)
        else:
            z.extract(f)

In [3]:
ee.Initialize()
ee_config = ee.cli.utils.CommandLineConfig()

In [31]:
local = pytz.timezone("Europe/Amsterdam")
input_dir = r'P:/11202200.005-kpp-cip2018/data/Satellitedataportaal'
os.chdir(input_dir)

rapideye = False
triplesat = True

if rapideye:
    files = glob.glob('RapidEye/RE_RD_12bit_RGBEI_5m_jp2000/**/**/*.jp2000', recursive=True)
    zippedfiles = glob.glob('RapidEye/RE_RD_12bit_RGBEI_5m_jp2000/**/**/*.zip', recursive=True)
    dst = 'RapidEye/RE_RD_12bit_RGBEI_5m_jp2000/'
    print(['Number of files to upload as assets = '+str(len(files))])
    print(['Number of zipped files = '+str(len(zippedfiles))])
elif triplesat:
    # TripleSat locations searched: Ameland, Dokkum, Haamstede, Hulst, Schiermonnikoog, Tweede_Maasvlakte, Terschelling, Westkapelle, Yerseke, Zierikzee
    files = glob.glob('TripleSat/Tri_RD_12bit_RGBI_80cm/*.tif')
    zippedfiles = glob.glob('TripleSat/Tri_RD_12bit_RGBI_80cm/**/**/**/*.zip')#, recursive=True)
    dst = 'TripleSat/Tri_RD_12bit_RGBI_80cm/'
    print(['Number of files to upload as assets = '+str(len(files))])
    print(['Number of zipped files = '+str(len(zippedfiles))])
#     print(zippedfiles)
else:
    print("No satellite selected.")
    
os.chdir(dst)

['Number of files to upload as assets = 3']
['Number of zipped files = 7']


In [None]:
print(zippedfiles[0])
filename = os.path.basename(zippedfiles[0])
print(filename)
filename_no_ext = os.path.splitext(filename)[0]
print(filename_no_ext)
filename_unzipped = os.path.join(input_dir,dst,filename_no_ext+'.jp2000')
print(filename_unzipped)

In [None]:
# Files must all be in the same file for Google Earth Engine Batch Asset Manager,
# but ftp.satellietdataportaal.nl transfer for RapidEye was categorized in monthly/daily folders

# Unzip all
if rapideye:
    for j in zippedfiles:
        filename = os.path.basename(j)
        filename_no_ext = os.path.splitext(filename)[0]
        filename_unzipped = os.path.join(input_dir,dst,filename_no_ext+'.jp2000')
        is_unzipped = os.path.lexists(filename_unzipped)
        if is_unzipped: 
            os.remove(os.path.join(input_dir,j))
        else:
            extractAll(os.path.join(input_dir,j))
            print(['Unzipped .jp2000 file from ' + filename])
            os.remove(os.path.join(input_dir,j))
elif triplesat:
    for j in zippedfiles:
        filename = os.path.basename(j)
        filename_no_ext = os.path.splitext(j)[0]
        filename_unzipped = os.path.join(input_dir,filename_no_ext+'.tif')
        is_unzipped = os.path.lexists(filename_unzipped)
        if is_unzipped: 
            os.remove(os.path.join(input_dir,j))
        else:
            extractAll(os.path.join(input_dir,j))
            print(['Unzipped .tif file from ' + filename])
            os.remove(os.path.join(input_dir,j))
else:
    print('No satellite selected')

['Unzipped .tif file from 20180302_093358_Tri_80cm_RD_12bit_RGBI_Schiermonnikoog.zip']
['Unzipped .tif file from 20180306_094211_Tri_80cm_RD_12bit_RGBI_Westkapelle.zip']


In [None]:

# # Move all files to destination path
# for k in files:
#     filename = os.path.basename(k)
#     path_move_file = os.path.join(input_dir,dst,filename)
#     if os.path.lexists(path_move_file):
#         pass
#     else:
#         os.rename(os.path.join(input_dir,k), path_move_file)

# delete empty directories
if rapideye:
    empty_dir = glob.glob('RapidEye/RE_RD_12bit_RGBEI_5m_jp2000/**/**')
#     empty_dir = glob.glob('RapidEye/**/**')
elif triplesat:
    empty_dir = glob.glob('TripleSat/Tri_RD_12bit_RGBI_80cm/**/**/**')
else:
    print("No satellite selected.")

for e in empty_dir:
    os.rmdir(os.path.join(input_dir,e))

In [13]:
# files = glob.glob('*.jp2')
# print(len(files))
files = glob.glob('*.tif')
print(len(files))

105


In [14]:
if rapideye:
#     files = glob.glob('*.jp2000')
    files = glob.glob('*.jp2')
#     print(files)
    for i, f in enumerate(files):
        print(str(i)+' of '+str(len(files)))
        filename = os.path.splitext(os.path.basename(f))[0]
        metafile = filename+'.txt'
        #Open existing dataset
        ds = gdal.Open(f)
        ds = gdal.Translate(filename+'.tif', ds, )
        ds = None
        
        pvl_dict = {}
        for line in open(metafile):
            line = line.strip() # strip leading and trailing whitespace
            if not "GROUP" in line: # skip lines containing "GROUP"
                line = line.replace('"', '') 
                vals = str.split(line, ' = ') 
                if (len(vals) == 1):
                    pass
                else:
                    pvl_dict[vals[0]] = vals[1]
        ds = gdal.Open(f)
        # Set metadata in datasource
        ds.SetMetadata(pvl_dict) 
        ds = None

['RE3_20170420_3163017_RD_12bit_RGBREI_5m.jp2', 'RE1_20170404_3162917_RD_12bit_RGBREI_5m.jp2', 'RE5_20170707_3162917_RD_12bit_RGBREI_5m.jp2', 'RE1_20170829_3163721_RD_12bit_RGBREI_5m.jp2', 'RE1_20170404_3162817_RD_12bit_RGBREI_5m.jp2', 'RE5_20170707_3162817_RD_12bit_RGBREI_5m.jp2', 'RE2_20170331_3163722_RD_12bit_RGBREI_5m.jp2', 'RE4_20170602_3163722_RD_12bit_RGBREI_5m.jp2', 'RE3_20170420_3162818_RD_12bit_RGBREI_5m.jp2', 'RE3_20170601_3163722_RD_12bit_RGBREI_5m.jp2', 'RE4_20170510_3162817_RD_12bit_RGBREI_5m.jp2', 'RE5_20170525_3163721_RD_12bit_RGBREI_5m.jp2', 'RE4_20170510_3162917_RD_12bit_RGBREI_5m.jp2', 'RE2_20170522_3163721_RD_12bit_RGBREI_5m.jp2', 'RE4_20170425_3163721_RD_12bit_RGBREI_5m.jp2', 'RE5_20170902_3163017_RD_12bit_RGBREI_5m.jp2', 'RE4_20170510_3162916_RD_12bit_RGBREI_5m.jp2', 'RE5_20170809_3162916_RD_12bit_RGBREI_5m.jp2', 'RE5_20170525_3162817_RD_12bit_RGBREI_5m.jp2', 'RE5_20170525_3162917_RD_12bit_RGBREI_5m.jp2', 'RE1_20170330_3263706_RD_12bit_RGBREI_5m.jp2', 'RE4_2017051

In [29]:
files = glob.glob('*.tif')
print(['Number of files to upload as assets = '+str(len(files))])
df = pd.DataFrame(columns=['filename','system:time_start'])

if rapideye:
    files = glob.glob('*.tif')
    print(['Number of files to upload as assets = '+str(len(files))])

    for i, f in enumerate(files):
        filename = os.path.splitext(os.path.basename(f))[0]
        metafile = filename+'.txt'
        pvl_dict = {}
        for line in open(metafile):
            line = line.strip() # strip leading and trailing whitespace
            if not "GROUP" in line: # skip lines containing "GROUP"
                line = line.replace('"', '') 
                vals = str.split(line, ' = ') 
                if (len(vals) == 1):
                    pass
                else:
                    pvl_dict[vals[0]] = vals[1]
        
        datestring = pvl_dict['Acquisition Date']
        timestring = pvl_dict['Acquisition Time']
        t = datetime.datetime.strptime(datestring+timestring, '%Y-%m-%d%H:%M:%S')
        time_start = t.replace(tzinfo=datetime.timezone.utc).timestamp()*1000
        
        df.loc[i] = [filename, time_start]
        
elif triplesat:
    files = glob.glob('*.tif')
    print(['Number of files to upload as assets = '+str(len(files))])

    for i, f in enumerate(files):
        filesize = os.path.getsize(f)
        filename = os.path.splitext(os.path.basename(f))[0]
        if (filesize > 10e9):
            in_path = os.path.join(input_dir,f)
            out_path = os.path.join(input_dir,filename_no_ext)
            ds = gdal.Open(in_path)
            band = ds.GetRasterBand(1)
            xsize = band.XSize
            ysize = band.YSize

            tile_size_x = int(math.ceil(xsize/2))
            tile_size_y = int(math.ceil(ysize/2))

            for i in range(0, xsize, tile_size_x):
                for j in range(0, ysize, tile_size_y):
                    com_string = "gdal_translate -of GTIFF -srcwin " + str(i)+ ", " + str(j) + ", " + str(tile_size_x) + ", " + str(tile_size_y) + " " + str(in_path) + " " + str(out_path) + str(i) + "_" + str(j) + ".tif"
                    os.system(com_string)
                    
            ds = None
        
        datestring = re.findall(r"\d{8}_\d{6}", filename)[0]        
        t = datetime.datetime.strptime(datestring, '%Y%m%d_%H%M%S')
        time_start = t.replace(tzinfo=datetime.timezone.utc).timestamp()*1000
        
        # add filename and time to dataframe
        df.loc[i] = [filename, time_start]
        
else:
    print("No satellite selected.")

# save dataframe as csv for metadata
df.set_index('filename', inplace=True)
meta = 'metadata.csv'
df.to_csv(meta)
df.head(n=5)

['Number of files to upload as assets = 105']


Unnamed: 0_level_0,system:time_start
filename,Unnamed: 1_level_1
RE3_20170904_3163721_RD_12bit_RGBREI_5m,1504523000000.0
RE5_20170902_3162816_RD_12bit_RGBREI_5m,1504351000000.0
RE5_20170525_3163017_RD_12bit_RGBREI_5m,1495711000000.0
RE5_20170902_3162916_RD_12bit_RGBREI_5m,1504351000000.0
RE2_20170409_3163721_RD_12bit_RGBREI_5m,1491736000000.0


In [30]:
# Use Google Earth Engine Batch Asset Manager
# Details at https://github.com/tracek/gee_asset_manager

user = "rogersckw9@gmail.com"

# Running the following within notebook doesn't work, need to input password.
# Copy and paste the following in command line to run
if rapideye:
    print("geebam upload --source {0} --dest users/rogersckw9/eo-bathymetry/rapideye-rgbrei -m {1} -u {2}".format(os.path.join(input_dir,dst), os.path.join(input_dir,dst,meta), user))
elif triplesat:
    print("geebam upload --source {0} --dest users/rogersckw9/eo-bathymetry/triplesat-rgbi -m {1} --large -u {2}".format(os.path.join(input_dir,dst), os.path.join(input_dir,dst,meta), user))
else:
    print("No satellite selected.")

geebam upload --source P:/11202200.005-kpp-cip2018/data/Satellitedataportaal\RapidEye/RE_RD_12bit_RGBEI_5m_jp2000/ --dest users/rogersckw9/eo-bathymetry/rapideye -m P:/11202200.005-kpp-cip2018/data/Satellitedataportaal\RapidEye/RE_RD_12bit_RGBEI_5m_jp2000/metadata.csv -u rogersckw9@gmail.com


In [None]:
# if uploading to Google Cloud first, follow this protocol (this is for RapidEye)

for i, f in enumerate(files):
    print('Processing file ' + f + ', file index: ' + str(i))

    # extract time in UTC
    fh = open(f, 'rb')
    tags = exifread.process_file(fh, stop_tag="Image DateTime")
    datestring = str(tags["Image DateTime"].values)
    t = datetime.datetime.strptime(datestring, '%Y:%m:%d %H:%M:%S')
    local_t = local.localize(t, is_dst=None)
    utc_t = local_t.astimezone(pytz.utc)
    time_start = utc_t.strftime('%Y-%m-%dT%H:%M:%S')

    # parse file names
    filename = os.path.basename(f)
    filename_no_ext = os.path.splitext(filename)[0]

    # get nodata value ... UGLY, UGLY code!
    nodata_value = -99999999
    with open(f, encoding="latin_1") as asc:
         for line in asc:
            if "nodata_value" in line.lower():
                nodata_value = line.split()[1]
                break
    
    run(r"C:\Users\Wilson\AppData\Local\Google\Cloud SDK\google-cloud-sdk\bin\gsutil.cmd cp {0} gs://eo-bathymetry/rapideye/{1}".format(os.path.join(input_dir,f),filename))
            
    # upload to GEE
    retry_count = 0

    while True:
        run("earthengine upload image --wait --asset_id=users/rogersckw9/rapideye/{1} --nodata_value={0} gs://eo-bathymetry/rapideye/{1}".format(nodata_value, filename))
                    
        check last task status
        tasks = ee.data.getTaskList()
        task_state = None
        for task in tasks:
            task_status = ee.data.getTaskStatus([task['id']])
            task_state = task_status[0]['state']
            print(task_status)
            break

        if task_state != 'FAILED':
            break # done
        else:
            retry_count += 1
            print('Retrying upload ' + str(retry_count) + ' ...')

        if retry_count > 10:
            print('Maximum number of retry reached, exiting ...')
            sys.exit(0)
    
    # set time
    run("earthengine asset set --time_start {0} users/rogersckw9/rapideye/{1}".format(time_start, filename_no_ext))

    if rapideye:
    
elif triplesat:

else:
    print("No satellite selected.")

In [None]:
# If TripleSat images are larger than 10GB (RGBI tend to be) must split images first to upload to GEE.
input_dir = r'P:\11202200.005-kpp-cip2018\data\Satellitedataportaal'
os.chdir(input_dir)

# find all .tif files
files = glob.glob('TripleSat\Tri_RD_8bit_RGBI_80cm\*.tif')#, recursive=True)
print(['Number of files to upload as assets = '+str(len(files))])

# find all .tif files
files = glob.glob('TripleSat\Tri_RD_8bit_RGBI_80cm\*.tif')#, recursive=True)
print(['Number of files to upload as assets = '+str(len(files))])

for f in files:
    filename = os.path.basename(f)
    filename_no_ext = os.path.splitext(filename)[0]    
    in_path = os.path.join(input_dir,f)
    out_path = os.path.join(input_dir,filename_no_ext)
    ds = gdal.Open(in_path)
    band = ds.GetRasterBand(1)
    xsize = band.XSize
    ysize = band.YSize
    
    tile_size_x = int(math.ceil(xsize/2))
    tile_size_y = int(math.ceil(ysize/2))

    for i in range(0, xsize, tile_size_x):
        for j in range(0, ysize, tile_size_y):
            com_string = "gdal_translate -of GTIFF -srcwin " + str(i)+ ", " + str(j) + ", " + str(tile_size_x) + ", " + str(tile_size_y) + " " + str(in_path) + " " + str(out_path) + str(i) + "_" + str(j) + ".tif"
            os.system(com_string)
            
    ds = None

In [None]:
files = glob.glob('TripleSat\**\*.tif')
# Inspect a .tif file
print(gdal.Info(files[0]))

In [None]:
tzinfo=datetime.timezone.utc
print(tzinfo)