In [None]:
#@title Copyright 2022 E.R. Maure { display-mode: "form" }

<table class="ee-notebook-buttons" align="left"><td>
<a target="_blank"  href="http://colab.research.google.com/github/google/earthengine-api/blob/master/python/examples/ipynb/Earth_Engine_asset_from_cloud_geotiff.ipynb">
    <img  width=60px src="https://upload.wikimedia.org/wikipedia/commons/d/d0/Google_Colaboratory_SVG_Logo.svg" /> Run in Google Colab</a>
</td><td>
<a target="_blank"  href="https://github.com/google/earthengine-api/blob/master/python/examples/ipynb/Earth_Engine_asset_from_cloud_geotiff.ipynb"><img width=36px src="https://github.githubassets.com/images/modules/logos_page/GitHub-Mark.png" /> View source on GitHub</a></td></table>

# Table of contents
- [Purpose](#Purpose)
- [Library import](#Library-import)
- [Input params](#Input-Params)
- [Function definitions](#Function-definitions)
- [GCloud and manifest file](#GCloud-and-manifest-file)
- [Upload the file to GEE](#Upload-the-file-to-GEE)
***

# Purpose
[Return to the "Table of contents"](#Table-of-contents)

This notebook reproduces the steps in Figure 1 of the https://doi.org/10.3390/rsxxxx

***

# Library import   

[Return to the "Table of contents"](#Table-of-contents)

Import all required modules below for the Level-2 resampling.  
Since we run the notebook in Google Colab, we can also install these modules prior to import.

---

In [9]:
!pip install netcdf4 pyresample gdal h5py pyproj

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [14]:
import os
import subprocess
import time
from pathlib import Path

import numpy as np
from osgeo import osr, gdal

Clone the resampling repository.  
It contains sample data and Python scripts for file IO

In [15]:
if not Path('/content/data').is_dir():
  Path('/content/data').mkdir(parents=True)  

In [17]:
from google.colab import drive
drive.mount('/content/data')

Mounted at /content/data


In [23]:
!git clone https://github.com/npec/ee-oc-data-ingestion.git
# !git clone git@github.com:npec/ee-oc-data-ingestion.git

Cloning into 'ee-oc-data-ingestion'...
fatal: could not read Username for 'https://github.com': No such device or address


In [21]:
%cd ee-oc-data-ingestion

[Errno 2] No such file or directory: 'ee-oc-data-ingestion'
/content


Local imports

In [None]:
# from . import SwathResmaple

# Input params
[Return to the "Table of contents"](#Table-of-contents)

Define the input file (netCDF4 or HDF5), output projection for the GeoTIFF file and the area id.  
We can also define here the [Google cloud bucket](https://cloud.google.com/storage/docs/gsutil), the [Earth Engine Asset](https://developers.google.com/earth-engine/guides/asset_manager).  The EE asset is where GeoTIFF file is uploaded to.

---

1. The case of MODIS/Aqua sample file

In [None]:
# Smaple MODIS/Aqua file
INPUT_FILE = Path('sample_data/A2022125035500.L2_LAC_OC.nc')
# Target projection
PROJ_NAME = 'laea'
# Projection area ID
# Output Path
OPATH = Path('result').absolute()
if not OPATH.is_dir():
    OPATH.mkdir(parents=True)
# GeoTIFF output
TRG_TIFF = OPATH.joinpath('A2022125035500.L2_LAC_OC.tif')
# projection id
AREA_ID = 'custom'
# Gcloud bucket 
BUCKET = 'gs://bucket_name'
# Your user folder name and new asset name.
# Giving a name of an existing asset will result in error
ASSET_ID = '/users/name/folder-or-collection-id/new-asset'
# Asset name should not contain dots (.)
ASSET_NAME = 'A2022125035500_L2_LAC_OC

2. The case of SGLI/GCOM-C sample files

In [None]:
# # Smaple SGLI/GCOM-C files
# INPUT_FILE = [Path('sample_data/GC1SG1_202205030152F05810_L2SG_IWPRQ_3000.h5'),
#               Path('sample_data/GC1SG1_202205030152F05810_L2SG_NWLRQ_3000.h5')]
              
# # Target projection
# PROJ_NAME = 'laea'
# # Projection area ID
# # Output Path
# OPATH = Path('result').absolute()
# if not OPATH.is_dir():
#     OPATH.mkdir(parents=True)
# # GeoTIFF output
# TRG_TIFF = OPATH.joinpath('GC1SG1_202205030152F05810_L2SG_OC_3000.tif')
# # projection id
# AREA_ID = 'custom'
# # Gcloud bucket 
# BUCKET = 'gs://bucket_name'
# # Your user folder name and new asset name.
# # Giving a name of an existing asset will result in error
# ASSET_ID = '/users/name/folder-or-collection-id/new-asset'
# # Asset name should not contain dots (.)
# ASSET_NAME = 'GC1SG1_202205030152F05810_L2SG_OC_3000

# Function definitions
[Return to the "Table of contents"](#Table-of-contents)

Operations with Level-2 file 
`SwathResmaple` contains the following helper functions
1. `SwathResmaple.resample` - for swath resampling, accepts, data, cutoff and sequence number
2. `SwathResmaple.scale` - for data scaling into integer, accepts data and varname
3. `SwathResmaple.open` - open a new GeoTIFF file in write mode, accepts number os bands
4. `SwathResmaple.append` - append var(i) in the opened GeoTIFF, accept band sequence number and varname
5. `SwathResmaple.translate` - GDAL translate the created GeoTIFF
6. `SwathResmaple.close` - closes file, both netCDF4/HDF5 and GeoTIFF

In [None]:
with SwathResample(INPUT_FILE, trg_tif=TRG_TIFF, srs=PROJ_NAME, area_id=AREA_ID) as fid:
    
    # get the spatial resolution from file metadata
    roi = fid.spatial_resolution()
    
    # in the case of SGLI pass two files, since CHL and Rrs are in separate files
    keys = self.get_keys()
    joined = '\n\t'.join(keys)
    print(f'Variables being mapped\n\t{joined}')
    
    # open GeoTIFF file
    fid.open(bands=len(keys))
    
    # to avoid memory errors, especially with SGLI 250 m, process one variable at a time
    for i, key in enumerate(keys):
        # Read data from file
        sds = fid.get_data(key=key)
        # Pyresample, use twice the resolution. 
        # compansate for the decrease in resolution towards swath edge
        res = fid.resample(data=sds, i=i, roi=roi*2)
        # Scale data to int
        fid.data = fid.scale(key=key, data=res)
        # output result in GeoTIFFe
        fid.append(band=i, key=key)

    # translate the tif
    fid.translate()
    subprocess.check_call(f'gdalinfo {TRG_TIFF}', shell=True)
    # There is temporary file created internally, remove
    tmp_file = fid.tmp_tif
tmp_file.unlink(missing_ok=True)

# GCloud and manifest file
[Return to the "Table of contents"](#Table-of-contents)

1. Send file to Google Cloud bucket (or file can also be [uploaded](https://developers.google.com/earth-engine/guides/image_upload) directly into GEE)
2. Build ingestion manifest file 
3. Send to GEE using Earth Engine [command-line tools](https://developers.google.com/earth-engine/guides/command_line#upload) 

`build_manifest` accepts the GeoTIFF file, asset_id, asset_name and uri (location of the file in the Gcloud), builds the manifest file)

In [None]:
def build_manifest(file, asset_id, asset_name, uri):
    """
    Writes earth engine manifest file for data upload
    manifest file name
    """

    dataset = gdal.Open(str(file), gdal.GA_ReadOnly)
    attrs = dataset.GetMetadata()
    update = attrs.update

    data = {
        "name": asset_id,
        "tilesets": [
            {'id': asset_name,
             "sources": [
                 {"uris": [uri]}
             ]}
        ],
        "start_time": attrs.pop("time_coverage_start"),
        "end_time": attrs.pop("time_coverage_end"),
        'bands': [],
        "properties": attrs
    }
    append = data['bands'].append

    for count in range(dataset.RasterCount):
        band = dataset.GetRasterBand(count + 1)
        varname = band.GetDescription()

        var_attrs = band.GetMetadata()
        update(var_attrs)
        if name == 'l2_flags':
            append({"id": varname,
                    'tileset_id': asset_name,
                    "pyramidingPolicy": "SAMPLE",
                    "tileset_band_index": count})
        else:
            novalue = band.GetNoDataValue()
            append({"id": varname,
                    'tileset_id': asset_name,
                    "tileset_band_index": count,
                    "pyramidingPolicy": "MEAN",
                    "missing_data": {
                        "values": [float(novalue)]}
                    })

    manifest = Path(f'result/{fid}_manifest.json').absolute()
    # print(data)
    with open(manifest, 'w') as mfs:
        json.dump(data, mfs)
    return manifest

In [None]:
# -------------
# GCloud upload
# -------------
# uncomment below if bucket is defined
# uri = f'{BUCKET}/{Path(TRG_TIFF).name}'
# cmd = f'gsutil cp {TRG_TIFF} {BUCKET}'
# info = f'{"-" * len(cmd)}'
# print(f'{info}\n{cmd}\n{info}')
# subprocess.check_call(cmd, shell=True)

In [None]:
# --------------
# build manifest
# --------------
# uncomment below if file is being uploaded through GCloud
# manifest = build_manifest(file=TRG_TIFF, 
#                           asset_id=ASSET_ID, 
#                           asset_name=ASSET_NAME, 
#                           uri=uri)

# Upload the file to GEE

[Return to the "Table of contents"](#Table-of-contents)

## Start an authorized session

To be able to upload the file into your Earth Engine asset, you need to authenticate as you when you make the request.  You can use credentials from the Earth Engine authentictor to start an [`AuthorizedSession`](https://google-auth.readthedocs.io/en/master/reference/google.auth.transport.requests.html#google.auth.transport.requests.AuthorizedSession).  You can then use the `AuthorizedSession` to send requests to Earth Engine.

In [None]:
# import ee
# from google.auth.transport.requests import AuthorizedSession

# ee.Authenticate()  #  or !earthengine authenticate --auth_mode=gcloud
# session = AuthorizedSession(ee.data.get_persistent_credentials())

In [None]:
# ----------------
# Send file to GEE
# ----------------  
# cmd = f'earthengine upload image --manifest {manifest}'
# task = subprocess.check_output(cmd, shell=True)
# print(task)