Cumulative Thermal Stress on Coral Reefs	bio.029	https://coralreefwatch.noaa.gov/satellite/bleaching5km/index.php

In [None]:
import numpy as np
import pandas as pd
import rasterio

import boto3
import requests as req

from matplotlib import pyplot as plt
%matplotlib inline
import os
import sys
import threading

Establish s3 location

In [None]:
s3_bucket = "wri-public-data"
s3_folder = "resoucewatch/<tif_folder_name>"
s3_file = "<tif_file_name[s]>.tif"

s3_key_orig = s3_folder + s3_file
s3_key_edit = s3_key_orig[0:-4] + "_edit.tif"

Create local staging folder for holding data

In [None]:
!mkdir staging
os.chdir("staging")
staging_folder = os.getcwd()
os.environ["Z_STAGING_FOLDER"] = staging_folder

If data already on s3, create a staging key and download to staging folder

In [None]:
staging_file = "/<staging_tif_name>.tif"
staging_key_orig = staging_folder + staging_file
staging_key_edit = staging_key_orig[0:-4] + "_edit.tif"

s3 = boto3.resource("s3")
s3.meta.client.download_file(s3_bucket, s3_key_orig, staging_key_orig)
s3.meta.client.download_file(s3_bucket, s3_key_edit, staging_key_edit)

If data in local storage, move to staging folder

In [None]:
local_folder = "/Users/nathansuberi/Desktop/WRI_Programming/RW_Data"
rw_data_type = "/<data_topic>"
# Topics include: [Society, Food, Forests, Water, Energy, Climate, Cities, Biodiversity, Commerce, Disasters]
local_file = "/<file_name>.tif"
local_key = local_folder + rw_data_type + local_file

staging_key_orig = staging_folder + local_file
staging_key_edit = staging_key_orig[0:-4] + "_edit.tif"

os.rename(local_key, staging_key_orig)

<b>Regardless of any needed edits, upload original file</b>

<i>Upload tif to S3 folder</i>

http://boto3.readthedocs.io/en/latest/guide/s3-example-creating-buckets.html

<i>Monitor Progress of Upload</i>

http://boto3.readthedocs.io/en/latest/_modules/boto3/s3/transfer.html
https://boto3.readthedocs.io/en/latest/guide/s3.html#using-the-transfer-manager

In [None]:
s3 = boto3.client("s3")

class ProgressPercentage(object):
        def __init__(self, filename):
            self._filename = filename
            self._size = float(os.path.getsize(filename))
            self._seen_so_far = 0
            self._lock = threading.Lock()

        def __call__(self, bytes_amount):
            # To simplify we'll assume this is hooked up
            # to a single filename.
            with self._lock:
                self._seen_so_far += bytes_amount
                percentage = (self._seen_so_far / self._size) * 100
                sys.stdout.write(
                    "\r%s  %s / %s  (%.2f%%)" % (
                        self._filename, self._seen_so_far, self._size,
                        percentage))
                sys.stdout.flush()

In [None]:
# Defined above:
# s3_bucket
# s3_key_orig
# s3_key_edit
# staging_key_orig
# staging_key_edit

s3.upload_file(staging_key_orig, s3_bucket, s3_key_orig,
                         Callback=ProgressPercentage(staging_key_orig))

Check for compression, projection

Create edit file if necessary

In [None]:
# Check Compression, Projection
os.environ["Z_FILE_LOC"] = staging_key_orig

# Use GDAL command line tools... 
# can replace with rasterio eventually
!gdalinfo $Z_FILE_LOC

In [None]:
# Project & Compress

# Project with gdalwarp, compress with gdal_translate
# https://gis.stackexchange.com/questions/89444/file-size-inflation-normal-with-gdalwarp
# http://www.gdal.org/gdalwarp.html
# http://www.perrygeo.com/lazy-raster-processing-with-gdal-vrts.html

staging_temp_file = staging_folder + "/temp.vrt"
os.environ["Z_FILE_TEMP"] = staging_temp_file
!gdalwarp -t_srs EPSG:4326 -of vrt $ZFILELOC $Z_FILE_TEMP

# Compress
# http://www.gdal.org/gdal_translate.html

os.environ["Z_FILE_DEST"] = staging_key_edit
!gdal_translate -of GTiff -co COMPRESS=LZW $Z_FILE_TEMP $Z_FILE_DEST

In [None]:
!gdalinfo $Z_FILE_DEST

Examine data


In [None]:
# with rasterio.open(staging_key_orig) as src:
with rasterio.open(staging_key_edit) as src:
    profile = src.profile
    print(profile)
    data = src.read(1)

In [None]:
data

In [None]:
# This works if all the raster values are positive whole numbers
counts = {}
for row in range(data[:,0].shape[0]):
    cts = np.bincount(data[row,:])
    for val, ct in enumerate(cts):
        try:
            counts[val] += ct
        except:
            counts[val] = ct

In [None]:
counts

Upload edited files to S3

In [None]:
# Defined above:
# s3_bucket
# s3_key_orig
# s3_key_edit
# staging_key_orig
# staging_key_edit

s3.upload_file(staging_key_edit, s3_bucket, s3_key_edit,
                         Callback=ProgressPercentage(staging_key_edit))

Layer definition

https://github.com/resource-watch/notebooks/blob/master/ResourceWatch/Api_definition/layer_definition.ipynb

Upload to server destination

In [None]:
# Too big for ArcGIS Online to upload using their web interface... 1 GB limit

Remove data from computer / instance

In [None]:
os.chdir("..")
!rm -r $Z_STAGING_FOLDER