Threat to Coral Reefs from Watershed-Based Pollution	bio.024.3	http://www.wri.org/publication/reefs-risk-revisited

In [1]:
import numpy as np
import pandas as pd
import rasterio

import boto3
import requests as req

from matplotlib import pyplot as plt
%matplotlib inline
import os
import sys
import threading

Establish s3 location

In [2]:
s3_bucket = "wri-public-data"
s3_folder = "resourcewatch/bio_026_coral_reef_threat_from_watershed_pollution/"
s3_file = "bio_026_coral_reef_threat_from_watershed_pollution.tif"

s3_key_orig = s3_folder + s3_file
s3_key_edit = s3_key_orig[0:-4] + "_edit.tif"

Create local staging folder for holding data

If data already on s3, create a staging key and download to staging folder

In [None]:
staging_file = "/<staging_tif_name>.tif"
staging_key_orig = staging_folder + staging_file
staging_key_edit = staging_key_orig[0:-4] + "_edit.tif"

s3 = boto3.resource("s3")
s3.meta.client.download_file(s3_bucket, s3_key_orig, staging_key_orig)
s3.meta.client.download_file(s3_bucket, s3_key_edit, staging_key_edit)

If data in local storage, move to staging folder

In [4]:
local_folder = "/Users/nathansuberi/Desktop/WRI_Programming/RW_Data"
rw_data_type = "/Biodiversity"
# Topics include: [Society, Food, Forests, Water, Energy, Climate, Cities, Biodiversity, Commerce, Disasters]
local_file = "/rf_sed1.tif"
local_key = local_folder + rw_data_type + local_file

staging_key_orig = local_folder + rw_data_type + local_file
staging_key_edit = staging_key_orig[0:-4] + "_edit.tif"

#os.rename(local_key, staging_key_orig)

<b>Regardless of any needed edits, upload original file</b>

<i>Upload tif to S3 folder</i>

http://boto3.readthedocs.io/en/latest/guide/s3-example-creating-buckets.html

<i>Monitor Progress of Upload</i>

http://boto3.readthedocs.io/en/latest/_modules/boto3/s3/transfer.html
https://boto3.readthedocs.io/en/latest/guide/s3.html#using-the-transfer-manager

In [6]:
s3 = boto3.client("s3")

class ProgressPercentage(object):
        def __init__(self, filename):
            self._filename = filename
            self._size = float(os.path.getsize(filename))
            self._seen_so_far = 0
            self._lock = threading.Lock()

        def __call__(self, bytes_amount):
            # To simplify we'll assume this is hooked up
            # to a single filename.
            with self._lock:
                self._seen_so_far += bytes_amount
                percentage = (self._seen_so_far / self._size) * 100
                sys.stdout.write(
                    "\r%s  %s / %s  (%.2f%%)" % (
                        self._filename, self._seen_so_far, self._size,
                        percentage))
                sys.stdout.flush()

In [7]:
# Defined above:
# s3_bucket
# s3_key_orig
# s3_key_edit
# staging_key_orig
# staging_key_edit

s3.upload_file(local_key, s3_bucket, s3_key_orig,
                         Callback=ProgressPercentage(local_key))

/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Biodiversity/rf_sed1.tif  55635478 / 55635478.0  (100.00%)

Check for compression, projection

Create edit file if necessary

In [8]:
with rasterio.open(local_key) as src:
    print(src.profile)
    data = src.read(1)

{'driver': 'GTiff', 'dtype': 'int16', 'nodata': -32768.0, 'width': 80150, 'height': 25456, 'count': 1, 'crs': CRS({'proj': 'cea', 'lon_0': -160, 'lat_ts': 0, 'x_0': 0, 'y_0': 0, 'datum': 'WGS84', 'units': 'm', 'no_defs': True}), 'transform': (-20037507.067162, 500.0, 0.0, 6364114.668074458, 0.0, -500.0), 'affine': Affine(500.0, 0.0, -20037507.067162,
       0.0, -500.0, 6364114.668074458), 'blockxsize': 128, 'blockysize': 128, 'tiled': True, 'compress': 'lzw', 'interleave': 'band'}


In [9]:
outFile = staging_key_edit

with rasterio.open(local_key) as src:
    profile = src.profile
    print(profile)
    data = src.read(1)
    
    # Return lat info
    south_lat = -90
    north_lat = 90

    # Return lon info
    west_lon = -180
    east_lon = 180
    # Transformation function
    transform = rasterio.transform.from_bounds(west_lon, south_lat, east_lon, north_lat, data.shape[1], data.shape[0])
    # Profile
    profile = {
        'driver':'GTiff', 
        'height':data.shape[0], 
        'width':data.shape[1], 
        'count':1, 
        'dtype':'int16', 
        'transform':transform,
        'crs':'EPSG:4326', 
        'compress':'lzw', 
        'nodata': -9999
    }
    
    np.putmask(data, data==-32768, -9999)
    
    with rasterio.open(outFile, 'w', **profile) as dst:
        dst.write(data.astype(profile['dtype']), 1)

{'driver': 'GTiff', 'dtype': 'int16', 'nodata': -32768.0, 'width': 80150, 'height': 25456, 'count': 1, 'crs': CRS({'proj': 'cea', 'lon_0': -160, 'lat_ts': 0, 'x_0': 0, 'y_0': 0, 'datum': 'WGS84', 'units': 'm', 'no_defs': True}), 'transform': (-20037507.067162, 500.0, 0.0, 6364114.668074458, 0.0, -500.0), 'affine': Affine(500.0, 0.0, -20037507.067162,
       0.0, -500.0, 6364114.668074458), 'blockxsize': 128, 'blockysize': 128, 'tiled': True, 'compress': 'lzw', 'interleave': 'band'}


In [12]:
with rasterio.open(staging_key_edit) as src:
    print(src.profile)

{'driver': 'GTiff', 'dtype': 'int16', 'nodata': -9999.0, 'width': 80150, 'height': 25456, 'count': 1, 'crs': CRS({'init': 'epsg:4326'}), 'transform': (-180.0, 0.004491578290704928, 0.0, 90.0, 0.0, -0.007071024512884978), 'affine': Affine(0.004491578290704928, 0.0, -180.0,
       0.0, -0.007071024512884978, 90.0), 'compress': 'lzw', 'tiled': False, 'interleave': 'band'}


Upload edited files to S3

In [11]:
# Defined above:
# s3_bucket
# s3_key_orig
# s3_key_edit
# staging_key_orig
# staging_key_edit

s3.upload_file(staging_key_edit, s3_bucket, s3_key_edit,
                         Callback=ProgressPercentage(staging_key_edit))

/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Biodiversity/rf_sed1_edit.tif  26540340 / 26540340.0  (100.00%)

Layer definition

https://github.com/resource-watch/notebooks/blob/master/ResourceWatch/Api_definition/layer_definition.ipynb

Upload to server destination

In [None]:
# Too big for ArcGIS Online to upload using their web interface... 1 GB limit

Remove data from computer / instance

In [None]:
os.chdir("..")
!rm -r $Z_STAGING_FOLDER