In [1]:
import os
import sys
import boto3
from time import time
from time import sleep
import rasterio
import xarray as xr
import rioxarray

In [2]:
#!pip install rioxarray --user

In [3]:
def _xr_open_rasterio_retry(s3_file_name):
    cnt=20
    sleeptime=6
    while(cnt>0):
        try:
            da = xr.open_rasterio(s3_file_name)
            print('SUCCESS _xr_open_rasterio_retry', s3_file_name, flush=True)
            return da
        except rasterio.errors.RasterioIOError:
                        print("Unexpected error:", sys.exc_info()[0])
                        print('oops',cnt)
                        print('oops',s3_file_name, flush=True)
                        cnt = cnt - 1
                        sleep(sleeptime)

In [4]:
def xr_build_mosaic_ds(bucket, product, tifs):

    start = time()
    my_da_list =[]
    for tif in tifs:
        try:
            da = _xr_open_rasterio_retry(f's3://{bucket}/'+tif)
        except:
            print('error on ', tif, flush=True)
            print('FATAL error on ', tif, flush=True)

        try:
            da = da.squeeze().drop(labels='band')
            da.name=product
            my_da_list.append(da)
            tnow = time()
            elapsed = tnow - start
            print(tif, elapsed)
        except:
            print('FATAL SQUEEZE error on ', tif, flush=True)

    try:
        DS = xr.merge(my_da_list)
        return(DS)
    except:
        print('FATAL MERGE error on ', tif, flush=True)

In [5]:
def s3_push_delete_local(local_file, bucket, bucket_filepath):
        out_bucket = return_my_bucket(bucket_filepath)
        a = bucket_filepath.split('/')
        bucket_filepath = '/'.join(a[1:]) # strip bucket from path
        print('PUSH', out_bucket, bucket_filepath)
        s3 = boto3.client('s3')
        with open(local_file, "rb") as f:
            s3.upload_fileobj(f, out_bucket, bucket_filepath)
        os.remove(local_file)

In [6]:
def _run_command(cmd, verbose=False):
    if verbose:
        print(cmd)
    result = os.system(cmd)
    if result != 0:
        raise Exception('command "%s" failed with code %d.' % (cmd, result))

In [7]:
def cog_create_from_tif(src_tif,dst_cog):
    command = f'rio cogeo create {src_tif} {dst_cog}'
    _run_command(command)

In [8]:
def return_my_bucket(prefix_with_slash):
    a = prefix_with_slash.split('/')
    print('a=',a)
    THE_BUCKET=a[0]
    print("the BUCKET=",THE_BUCKET)
    return THE_BUCKET

In [9]:
def xr_write_geotiff_from_ds(DS, primary_name, out_prefix_path):
    
    print(DS) # DS is the xarray
    print(primary_name) # first tif 
    print(out_prefix_path)

    a = primary_name.split('/')
    just_tif = a[-2] + '/' + a[-1]
    local_tif = a[-1]
    local_cog = 'COG_' + local_tif

    output = out_prefix_path + just_tif
    bucket = 'ws-enduser'
    print(f'OUTPUT=={output}')
    DS.rio.to_raster(local_tif)
    cog_create_from_tif(local_tif, local_cog)
    s3_push_delete_local(local_cog, bucket, output)
    os.remove(local_tif)
    local_xml = local_cog + '.aux.xml'
    os.remove(local_xml)

In [10]:
bucket = 'ws-enduser'
product = 'etasw'
tifs = ['USA/r37.0_tile0/2000/etasw_200001.tif',
        'USA/r37.0_tile1/2000/etasw_200001.tif'] 

# ,
#         's3://ws-enduser/USA/r37.0_tile2/2000/etasw_200001.tif',
#         's3://ws-enduser/USA/r37.0_tile3/2000/etasw_200001.tif',
#         's3://ws-enduser/USA/r50.0_tile5/2000/etasw_200001.tif',
#         's3://ws-enduser/USA/r50.0_tile6/2000/etasw_200001.tif',
#         's3://ws-enduser/USA/r50.0_tile7/2000/etasw_200001.tif',
#         's3://ws-enduser/USA/r50.0_tile8/2000/etasw_200001.tif',
#         's3://ws-enduser/USA/r50.0_tile9/2000/etasw_200001.tif',]

#output_mos_file = 's3://ws-enduser/USA/conus_mos/2020/etasw_200001.tif'

In [11]:
s3_file_name = 's3://ws-enduser/USA/r37.0_tile0/2000/etasw_200001.tif'
da = xr.open_rasterio(s3_file_name)
da

In [12]:
DS = xr_build_mosaic_ds(bucket, product, tifs)

SUCCESS _xr_open_rasterio_retry s3://ws-enduser/USA/r37.0_tile0/2000/etasw_200001.tif
USA/r37.0_tile0/2000/etasw_200001.tif 0.003519773483276367
SUCCESS _xr_open_rasterio_retry s3://ws-enduser/USA/r37.0_tile1/2000/etasw_200001.tif
USA/r37.0_tile1/2000/etasw_200001.tif 0.07576584815979004


In [13]:
DS

In [15]:
# DS = xr_build_mosaic_ds(bucket, product, tifs)
primary_name = tifs[0]  #first tif in list
out_prefix_path = 'ws-enduser/USA/conus_mos/2000/'

xr_write_geotiff_from_ds(DS, primary_name, out_prefix_path)

<xarray.Dataset>
Dimensions:  (x: 12499, y: 6729)
Coordinates:
  * x        (x) float64 -125.0 -125.0 -125.0 -125.0 ... -99.0 -99.0 -99.0
  * y        (y) float64 23.0 23.0 23.0 23.01 23.01 ... 36.99 37.0 37.0 37.0
Data variables:
    etasw    (y, x) float64 -7.846e+39 -7.846e+39 -7.846e+39 ... 8.264 8.289
Attributes:
    transform:           (0.0020803384719955188, 0.0, -125.00034453048596, 0....
    crs:                 +init=epsg:4326
    res:                 (0.0020803384719955188, -0.0020810045000000006)
    is_tiled:            1
    nodatavals:          (nan,)
    scales:              (1.0,)
    offsets:             (0.0,)
    descriptions:        ('etasw_',)
    AREA_OR_POINT:       Area
    OVR_RESAMPLING_ALG:  NEAREST
USA/r37.0_tile0/2000/etasw_200001.tif
ws-enduser/USA/conus_mos/2000/
OUTPUT==ws-enduser/USA/conus_mos/2000/2000/etasw_200001.tif
a= ['ws-enduser', 'USA', 'conus_mos', '2000', '2000', 'etasw_200001.tif']
the BUCKET= ws-enduser
PUSH ws-enduser USA/conus_mos/2000/2

In [16]:
! aws s3 ls s3://ws-enduser/USA/conus_mos/2000/2000/

2021-08-26 20:52:23  413988954 etasw_200001.tif


In [17]:
! rio info s3://ws-enduser/USA/conus_mos/2000/2000/etasw_200001.tif

{"blockxsize": 512, "blockysize": 512, "bounds": [-125.00034453048596, 37.00204819041505, -98.99819396901397, 22.99896890991505], "colorinterp": ["gray"], "compress": "deflate", "count": 1, "crs": "EPSG:4326", "descriptions": ["etasw"], "driver": "GTiff", "dtype": "float64", "height": 6729, "indexes": [1], "interleave": "band", "lnglat": [-111.99926924974997, 30.00050855016505], "mask_flags": [["nodata"]], "nodata": NaN, "res": [0.0020803384719955188, -0.0020810045000000006], "shape": [6729, 12499], "tiled": true, "transform": [0.0020803384719955188, 0.0, -125.00034453048596, 0.0, 0.0020810045000000006, 22.99896890991505, 0.0, 0.0, 1.0], "units": [null], "width": 12499}


In [18]:
! rio info s3://ws-enduser/USA/conus_mos/2000/2000/etasw_200001.tif | python -m json.tool

{
    "blockxsize": 512,
    "blockysize": 512,
    "bounds": [
        -125.00034453048596,
        37.00204819041505,
        -98.99819396901397,
        22.99896890991505
    ],
    "colorinterp": [
        "gray"
    ],
    "compress": "deflate",
    "count": 1,
    "crs": "EPSG:4326",
    "descriptions": [
        "etasw"
    ],
    "driver": "GTiff",
    "dtype": "float64",
    "height": 6729,
    "indexes": [
        1
    ],
    "interleave": "band",
    "lnglat": [
        -111.99926924974997,
        30.00050855016505
    ],
    "mask_flags": [
        [
            "nodata"
        ]
    ],
    "nodata": NaN,
    "res": [
        0.0020803384719955188,
        -0.0020810045000000006
    ],
    "shape": [
        6729,
        12499
    ],
    "tiled": true,
    "transform": [
        0.0020803384719955188,
        0.0,
        -125.00034453048596,
        0.0,
        0.0020810045000000006,
        22.99896890991505,
       

In [20]:
#! gdalinfo /vsis3/ws-enduser/USA/conus_mos/2000/2000/etasw_200001.tif - not working yet

ERROR 15: AWS_SECRET_ACCESS_KEY configuration option not defined
gdalinfo failed - unable to open '/vsis3/ws-enduser/USA/conus_mos/2000/2000/etasw_200001.tif'.
