Amphibian and Mammal Habitats Under Threat, Merged	bio.012

In [1]:
import boto3

import rasterio as rio
import numpy as np

import os
import sys
import threading

Establish local and s3 file locations

In [5]:
# Local storage
local_tmp_folder = "/Users/nathansuberi/Desktop/RW_Data/"

tmp1 = local_tmp_folder + "all_amphibians.tif"
tmp2 = local_tmp_folder + "all_mammals.tif"
merge_files = [tmp1, tmp2]

tmp_merge = local_tmp_folder + "bio_012_merge.tif"

# S3 storage
s3_bucket = "wri-public-data"
s3_folder = "resourcewatch/raster/bio_012_amphibian_mammalian_species_under_threat_merge/"

s3_file1 = "resourcewatch/raster/bio_012_1_amphibian_species_under_threat/bio_012_1_amphibian_species_under_threat_edit.tif"
s3_file2 = "resourcewatch/raster/bio_012_2_mammalian_species_under_threat/bio_012_2_mammalian_species_under_threat_edit.tif"

# Make sure these match the order of the merge_files above
s3_files_to_merge = [s3_file1, s3_file2]
band_names = ["amphibian species under threat", "mammalian species under threat"]

s3_key_merge = s3_folder + "bio_012_amphibian_mammalian_species_under_threat_merge.tif"

# S3 services
s3_download = boto3.resource("s3")
s3_upload = boto3.client("s3")

# Helper function to view upload progress
class ProgressPercentage(object):
        def __init__(self, filename):
            self._filename = filename
            self._size = float(os.path.getsize(filename))
            self._seen_so_far = 0
            self._lock = threading.Lock()

        def __call__(self, bytes_amount):
            # To simplify we'll assume this is hooked up
            # to a single filename.
            with self._lock:
                self._seen_so_far += bytes_amount
                percentage = (self._seen_so_far / self._size) * 100
                sys.stdout.write(
                    "\r%s  %s / %s  (%.2f%%)" % (
                        self._filename, self._seen_so_far, self._size,
                        percentage))
                sys.stdout.flush()

In [4]:
#for ix, s3_file in enumerate(s3_files_to_merge):
#    s3_download.meta.client.download_file(s3_bucket, s3_file, merge_files[ix])
tmp_file = "/Users/nathansuberi/Desktop/RW_Data/bio_012_merge.tif"
s3_download.meta.client.download_file(s3_bucket, s3_key_merge, tmp_file)

In [13]:
# Bring in new species richness grids
# for file in merge_files:
#     with rio.open(file, 'r') as src:
#         print(src.profile)

for file in merge_files:
    os.environ["src"] = file
    os.environ["dst"] = file[:-4] + "_edit.tif"
    
    !gdal_translate -co COMPRESS=LZW -ot int32 -a_nodata 0 $src $dst
    
for file in merge_files:
    with rio.open(file[:-4] + "_edit.tif", 'r') as src:
        print(src.profile)
        
with rio.open(tmp_file, 'r') as src:
        print(src.profile)

Input file size is 43200, 21600
0...10...20...30...40...50...60...70...80...90...100 - done.
Input file size is 43200, 21600
0...10...20...30...40...50...60...70...80...90...100 - done.
{'driver': 'GTiff', 'dtype': 'int32', 'nodata': 0.0, 'width': 43200, 'height': 21600, 'count': 1, 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.00833330000000387, 0.0, -180.0,
       0.0, -0.00833330000000387, 89.99928000008359), 'tiled': False, 'compress': 'lzw', 'interleave': 'band'}
{'driver': 'GTiff', 'dtype': 'int32', 'nodata': 0.0, 'width': 43200, 'height': 21600, 'count': 1, 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.008333300000003874, 0.0, -180.0,
       0.0, -0.008333300000003874, 89.99928000008367), 'tiled': False, 'compress': 'lzw', 'interleave': 'band'}
{'driver': 'GTiff', 'dtype': 'int32', 'nodata': 0.0, 'width': 43200, 'height': 21600, 'count': 2, 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.00833330000000387, 0.0, -180.0,
       0.0, -0.00833330000000

<b>Regardless of any needed edits, upload original file</b>

<i>Upload tif to S3 folder</i>

http://boto3.readthedocs.io/en/latest/guide/s3-example-creating-buckets.html

<i>Monitor Progress of Upload</i>

http://boto3.readthedocs.io/en/latest/_modules/boto3/s3/transfer.html
https://boto3.readthedocs.io/en/latest/guide/s3.html#using-the-transfer-manager

Create a merged tif with both amphibian and mammal threat data, upload to s3

In [7]:
with rio.open(merge_files[0], 'r') as src:
    print(src.profile)

{'driver': 'GTiff', 'dtype': 'int32', 'nodata': 0.0, 'width': 43200, 'height': 21600, 'count': 1, 'crs': CRS({'init': 'epsg:4326'}), 'transform': (-180.0, 0.00833330000000387, 0.0, 89.99928000008359, 0.0, -0.00833330000000387), 'affine': Affine(0.00833330000000387, 0.0, -180.0,
       0.0, -0.00833330000000387, 89.99928000008359), 'bigtiff': 'no', 'compress': 'lzw', 'interleave': 'band', 'old_nodata': '2147483647.0', 'tiled': False}


In [15]:
band_names = ["amphibian species under threat", "mammalian species under threat", 
              "number amphibian species", "number mammalian species"]

with rio.open(tmp_file, 'r') as src:
    src_profile = src.profile
    src_profile.update(band_names = band_names,
                      count = 4)
    
tmp_merge = local_tmp_folder + "bio_012_merge2.tif"

with rio.open(tmp_merge, 'w', **src_profile) as dst:
    with rio.open(tmp_file, "r") as src:
        windows = src.block_windows()
        for win_id, window in windows:
            src_data1 = src.read(1, window=window)
            src_data2 = src.read(2, window=window)
            dst.write_band(1, src_data1, window=window)
            dst.write_band(2, src_data2, window=window)
    with rio.open(tmp1[:-4]+"_edit.tif") as src:
        windows = src.block_windows()
        for win_id, window in windows:
            src_data = src.read(1, window=window)
            dst.write_band(3, src_data, window=window)
    with rio.open(tmp2[:-4]+"_edit.tif") as src:
        windows = src.block_windows()
        for win_id, window in windows:
            src_data = src.read(1, window=window)
            dst.write_band(4, src_data, window=window)

            

In [16]:
with rio.open(tmp_merge, 'r') as src:
    print(src.profile)

{'driver': 'GTiff', 'dtype': 'int32', 'nodata': 0.0, 'width': 43200, 'height': 21600, 'count': 4, 'crs': CRS({'init': 'epsg:4326'}), 'transform': Affine(0.00833330000000387, 0.0, -180.0,
       0.0, -0.00833330000000387, 89.99928000008359), 'band_names': "['amphibian species under threat', 'mammalian species under threat', 'number amphibian species', 'number mammalian species']", 'bigtiff': 'no', 'compress': 'lzw', 'interleave': 'band', 'old_nodata': '2147483647.0', 'tiled': False}


Upload edited file to s3

In [17]:
s3_upload.upload_file(tmp_merge, s3_bucket, s3_key_merge,
           Callback=ProgressPercentage(tmp_merge))

/Users/nathansuberi/Desktop/RW_Data/bio_012_merge2.tif  416278195 / 416278195.0  (100.00%)

In [18]:
os.environ["s3_key"] = "s3://wri-public-data/" + s3_key_merge
os.environ["gs_key"] = "gs://resource-watch-public/" + s3_key_merge

!gsutil cp $s3_key $gs_key



Updates are available for some Cloud SDK components.  To install them,
please run:
  $ gcloud components update

Non-MD5 etag ("909690879cab9497a717144b97152621-50") present for key <Key: wri-public-data,resourcewatch/raster/bio_012_amphibian_mammalian_species_under_threat_merge/bio_012_amphibian_mammalian_species_under_threat_merge.tif>, data integrity checks are not possible.
Copying s3://wri-public-data/resourcewatch/raster/bio_012_amphibian_mammalian_species_under_threat_merge/bio_012_amphibian_mammalian_species_under_threat_merge.tif [Content-Type=binary/octet-stream]...
\ [1 files][397.0 MiB/397.0 MiB]   10.9 MiB/s                                   
Operation completed over 1 objects/397.0 MiB.                                    


In [19]:
os.environ["asset_id"] = "users/resourcewatch/bio_012_species_richness_grids_merged"

!earthengine upload image --asset_id=$asset_id $gs_key

Started upload task with ID: YAHLI5DAQH7CHVLF65NJVZVG


In [28]:
!earthengine task info YAHLI5DAQH7CHVLF65NJVZVG

YAHLI5DAQH7CHVLF65NJVZVG:
  State: RUNNING
  Type: Upload
  Description: Asset ingestion: users/resourcewatch/bio_012_species_richness_grids_merged
  Created: 2017-10-11 15:18:17.593000
  Started: 2017-10-11 15:31:00.672000
  Updated: 2017-10-11 15:41:00.789000


In [29]:
!earthengine acl set public $asset_id

In [30]:
os.environ["band_names"] = str(band_names)

!earthengine asset set -p '(string)band_names='"$band_names" $asset_id