High Density Areas of Urban Development	cit.014	http://data.jrc.ec.europa.eu/dataset/jrc-ghsl-ghs_smod_pop_globe_r2016a

In [4]:
import numpy as np
import pandas as pd
import rasterio

import boto3
import requests as req

from matplotlib import pyplot as plt
%matplotlib inline
import os
import sys
import threading

Establish s3 location

In [5]:
# Investigate what the data in these rasters means, and whether we can 
# Display high and low density clusters separately as is
s3_bucket = "wri-public-data"
s3_folder = "resourcewatch/cit_014_areas_of_urban_development/"

s3_files = ["cit_014_areas_of_urban_development_1975.tif",
            "cit_014_areas_of_urban_development_1990.tif",
            "cit_014_areas_of_urban_development_2000.tif",
            "cit_014_areas_of_urban_development_2015.tif",
            "cit_014_areas_of_urban_development_2015_HDC.tif",
            "cit_014_areas_of_urban_development_2015_LDC.tif"]
s3_file_merge = "cit_014_areas_of_urban_development_merge.tif"

s3_key_origs = []
s3_key_edits = []

for file in s3_files:
    orig = s3_folder + file
    s3_key_origs.append(orig)
    s3_key_edits.append(orig[0:-4] + "_edit.tif")
    
s3_key_merge = s3_folder + s3_file_merge

In [3]:
s3_key_edits

['resourcewatch/cit_014_areas_of_urban_development/cit_014_areas_of_urban_development_1975_edit.tif',
 'resourcewatch/cit_014_areas_of_urban_development/cit_014_areas_of_urban_development_1990_edit.tif',
 'resourcewatch/cit_014_areas_of_urban_development/cit_014_areas_of_urban_development_2000_edit.tif',
 'resourcewatch/cit_014_areas_of_urban_development/cit_014_areas_of_urban_development_2015_edit.tif']

Create local staging folder for holding data

In [None]:
!mkdir staging
os.chdir("staging")
staging_folder = os.getcwd()
os.environ["Z_STAGING_FOLDER"] = staging_folder

Local files

In [12]:
local_folder = "/Users/nathansuberi/Desktop/WRI_Programming/RW_Data"
rw_data_type = "/Cities/"
# Topics include: [Society, Food, Forests, Water, Energy, Climate, Cities, Biodiversity, Commerce, Disasters]

local_files = [
    "GHS_SMOD_POP1975_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP1975_GLOBE_R2016A_54009_1k_v1_0.tif",
    "GHS_SMOD_POP1990_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP1990_GLOBE_R2016A_54009_1k_v1_0.tif",
    "GHS_SMOD_POP2000_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2000_GLOBE_R2016A_54009_1k_v1_0.tif",
    "GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0.tif",
    "GHS_SMOD_POP2015HDC_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2015HDC_GLOBE_R2016A_54009_1k_v1_0.tif",
    "GHS_SMOD_POP2015LDC_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2015LDC_GLOBE_R2016A_54009_1k_v1_0.tif"
]

local_orig_keys = []
local_edit_keys = []
for file in local_files:
    local_orig_keys.append(local_folder + rw_data_type + file)
    local_edit_keys.append(local_folder + rw_data_type + file[0:-4] + "_edit.tif")

In [24]:
local_orig_keys

['/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP1975_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP1975_GLOBE_R2016A_54009_1k_v1_0.tif',
 '/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP1990_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP1990_GLOBE_R2016A_54009_1k_v1_0.tif',
 '/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP2000_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2000_GLOBE_R2016A_54009_1k_v1_0.tif',
 '/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0.tif',
 '/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP2015HDC_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2015HDC_GLOBE_R2016A_54009_1k_v1_0.tif',
 '/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP2015LDC_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2015LDC_GLOBE_R2016A_54009_1k_v1_0.tif']

<b>Regardless of any needed edits, upload original file</b>

<i>Upload tif to S3 folder</i>

http://boto3.readthedocs.io/en/latest/guide/s3-example-creating-buckets.html

<i>Monitor Progress of Upload</i>

http://boto3.readthedocs.io/en/latest/_modules/boto3/s3/transfer.html
https://boto3.readthedocs.io/en/latest/guide/s3.html#using-the-transfer-manager

In [13]:
s3 = boto3.client("s3")

class ProgressPercentage(object):
        def __init__(self, filename):
            self._filename = filename
            self._size = float(os.path.getsize(filename))
            self._seen_so_far = 0
            self._lock = threading.Lock()

        def __call__(self, bytes_amount):
            # To simplify we'll assume this is hooked up
            # to a single filename.
            with self._lock:
                self._seen_so_far += bytes_amount
                percentage = (self._seen_so_far / self._size) * 100
                sys.stdout.write(
                    "\r%s  %s / %s  (%.2f%%)" % (
                        self._filename, self._seen_so_far, self._size,
                        percentage))
                sys.stdout.flush()

In [9]:
# Defined above:
# s3_bucket
# s3_key_orig
# s3_key_edit
# staging_key_orig
# staging_key_edit
for i in range(0,6):
    print(i)
    s3.upload_file(local_orig_keys[i], s3_bucket, s3_key_origs[i],
                   Callback=ProgressPercentage(local_orig_keys[i]))

0
/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP1975_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP1975_GLOBE_R2016A_54009_1k_v1_0.tif  6849612 / 6849612.0  (100.00%)1
/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP1990_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP1990_GLOBE_R2016A_54009_1k_v1_0.tif  7551423 / 7551423.0  (100.00%)2
/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP2000_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2000_GLOBE_R2016A_54009_1k_v1_0.tif  7986928 / 7986928.0  (100.00%)3
/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2015_GLOBE_R2016A_54009_1k_v1_0.tif  7980523 / 7980523.0  (100.00%)4
/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP2015HDC_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP2015HDC_GLOBE_R2016A_54009_1k_v1_0.tif  7666110 / 7666110.0  (100.00%)5
/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP2

Check for compression, projection

Create edit file if necessary

In [15]:
# Check Compression, Projection
with rasterio.open(local_orig_keys[0]) as src:
    pro0 = src.profile
    data0 = src.read(1)
with rasterio.open(local_orig_keys[1]) as src:
    pro1 = src.profile
    data1 = src.read(1)
with rasterio.open(local_orig_keys[2]) as src:
    pro2 = src.profile
    data2 = src.read(1)
with rasterio.open(local_orig_keys[3]) as src:
    pro3 = src.profile
    data3 = src.read(1)
with rasterio.open(local_orig_keys[4]) as src:
    pro4 = src.profile
    data4 = src.read(1)
with rasterio.open(local_orig_keys[5]) as src:
    pro5 = src.profile
    data5 = src.read(1)

# uniq0 = np.unique(data0, return_counts=True)
# uniq1 = np.unique(data1, return_counts=True)
# uniq2 = np.unique(data2, return_counts=True)
# uniq3 = np.unique(data3, return_counts=True)
# uniq4 = np.unique(data4, return_counts=True)
# uniq5 = np.unique(data5, return_counts=True)

In [20]:
uniq4

(array([  0.00000000e+00,   1.00000000e+00,   2.00000000e+00, ...,
          1.38420000e+04,   1.38430000e+04,   1.38440000e+04]),
 array([540112205,        19,        47, ...,        19,        31,
               60]))

In [16]:
# Examine each of the profiles - are they all the same data type?
print(pro0)
print(pro1)
print(pro2)
print(pro3)
print(pro4)
print(pro5)
profiles = [pro0, pro1, pro2, pro3, pro4, pro5]

{'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None, 'width': 35497, 'height': 15236, 'count': 1, 'crs': CRS({'proj': 'moll', 'lon_0': 0, 'x_0': 0, 'y_0': 0, 'ellps': 'WGS84', 'units': 'm', 'no_defs': True}), 'transform': (-17619594.54744353, 1000.0, 0.0, 8751029.46186849, 0.0, -1000.0), 'affine': Affine(1000.0, 0.0, -17619594.54744353,
       0.0, -1000.0, 8751029.46186849), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'lzw', 'interleave': 'band'}
{'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None, 'width': 35497, 'height': 15236, 'count': 1, 'crs': CRS({'proj': 'moll', 'lon_0': 0, 'x_0': 0, 'y_0': 0, 'ellps': 'WGS84', 'units': 'm', 'no_defs': True}), 'transform': (-17619594.54744353, 1000.0, 0.0, 8751029.46186849, 0.0, -1000.0), 'affine': Affine(1000.0, 0.0, -17619594.54744353,
       0.0, -1000.0, 8751029.46186849), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'lzw', 'interleave': 'band'}
{'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None

Upload edited files to S3

In [17]:
# Defined above:
# s3_bucket
# s3_key_orig
# s3_key_edit
# staging_key_orig
# staging_key_edit

for i in range(0,6):
    orig_key = local_orig_keys[i]
    edit_key = local_edit_keys[i]
    # Use rasterio to reproject and store locally, then upload
    with rasterio.open(orig_key) as src:
        kwargs = profiles[i]
        print(kwargs)
        kwargs.update(
            driver='GTiff',
            dtype=rasterio.int32,  #rasterio.int16, rasterio.int32, rasterio.uint8,rasterio.uint16, rasterio.uint32, rasterio.float32, rasterio.float64
            count=1,
            compress='lzw',
            nodata=0,
            bigtiff='NO',
            crs = 'EPSG:4326', 
        )
        
        windows = src.block_windows()
        
        with rasterio.open(edit_key, 'w', **kwargs) as dst:
            for idx, window in windows:
                src_data = src.read(1, window=window)
                formatted_data = src_data.astype("int32")
                dst.write_band(1, formatted_data, window=window)
    
    s3.upload_file(edit_key, s3_bucket, s3_key_edits[i],
               Callback=ProgressPercentage(edit_key))

{'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None, 'width': 35497, 'height': 15236, 'count': 1, 'crs': CRS({'proj': 'moll', 'lon_0': 0, 'x_0': 0, 'y_0': 0, 'ellps': 'WGS84', 'units': 'm', 'no_defs': True}), 'transform': (-17619594.54744353, 1000.0, 0.0, 8751029.46186849, 0.0, -1000.0), 'affine': Affine(1000.0, 0.0, -17619594.54744353,
       0.0, -1000.0, 8751029.46186849), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'lzw', 'interleave': 'band'}


  transform = guard_transform(transform)


/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP1975_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP1975_GLOBE_R2016A_54009_1k_v1_0_edit.tif  11308739 / 11308739.0  (100.00%){'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None, 'width': 35497, 'height': 15236, 'count': 1, 'crs': CRS({'proj': 'moll', 'lon_0': 0, 'x_0': 0, 'y_0': 0, 'ellps': 'WGS84', 'units': 'm', 'no_defs': True}), 'transform': (-17619594.54744353, 1000.0, 0.0, 8751029.46186849, 0.0, -1000.0), 'affine': Affine(1000.0, 0.0, -17619594.54744353,
       0.0, -1000.0, 8751029.46186849), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'lzw', 'interleave': 'band'}
/Users/nathansuberi/Desktop/WRI_Programming/RW_Data/Cities/GHS_SMOD_POP1990_GLOBE_R2016A_54009_1k_v1_0/GHS_SMOD_POP1990_GLOBE_R2016A_54009_1k_v1_0_edit.tif  12215596 / 12215596.0  (100.00%){'driver': 'GTiff', 'dtype': 'uint8', 'nodata': None, 'width': 35497, 'height': 15236, 'count': 1, 'crs': CRS({'proj': 'moll', 'lon_0': 0, 'x_0': 0,

In [18]:
s3_file_merge

'cit_014_areas_of_urban_development_merge.tif'

Merge files and upload to s3

In [19]:
merge_key = './'+s3_file_merge

kwargs = profiles[i]
print(kwargs)
kwargs.update(
    driver='GTiff',
    dtype=rasterio.int32,  #rasterio.int16, rasterio.int32, rasterio.uint8,rasterio.uint16, rasterio.uint32, rasterio.float32, rasterio.float64
    count=len(profiles),
    compress='lzw',
    nodata=0,
    bigtiff='NO',
    crs = 'EPSG:4326', 
)

with rasterio.open(merge_key, 'w', **kwargs) as dst:
    for idx, file in enumerate(local_edit_keys):
        print(idx)
        with rasterio.open(file) as src:

            band = idx+1
            windows = src.block_windows()

            for win_id, window in windows:
                src_data = src.read(1, window=window)
                dst.write_band(band, src_data, window=window)

s3.upload_file(merge_key, s3_bucket, s3_key_merge,
           Callback=ProgressPercentage(merge_key))

{'driver': 'GTiff', 'dtype': 'int32', 'nodata': 0, 'width': 35497, 'height': 15236, 'count': 1, 'crs': 'EPSG:4326', 'transform': (-17619594.54744353, 1000.0, 0.0, 8751029.46186849, 0.0, -1000.0), 'affine': Affine(1000.0, 0.0, -17619594.54744353,
       0.0, -1000.0, 8751029.46186849), 'blockxsize': 512, 'blockysize': 512, 'tiled': True, 'compress': 'lzw', 'interleave': 'band', 'bigtiff': 'NO'}


  transform = guard_transform(transform)


./cit_014_areas_of_urban_development_merge.tif  62815620 / 62815620.0  (100.00%)

Inspect the final product

In [20]:
tmp = "./temp"
s3 = boto3.resource("s3")
s3.meta.client.download_file(s3_bucket, s3_key_merge, tmp)

In [21]:
with rasterio.open(tmp) as src:
    print(src.profile)
    data = src.read(4)

{'driver': 'GTiff', 'dtype': 'int32', 'nodata': 0.0, 'width': 35497, 'height': 15236, 'count': 6, 'crs': CRS({'init': 'epsg:4326'}), 'transform': (-17619594.54744353, 1000.0, 0.0, 8751029.46186849, 0.0, -1000.0), 'affine': Affine(1000.0, 0.0, -17619594.54744353,
       0.0, -1000.0, 8751029.46186849), 'bigtiff': 'no', 'blockxsize': 512, 'blockysize': 512, 'compress': 'lzw', 'interleave': 'band', 'tiled': True}


In [38]:
os.getcwd()

'/Users/nathansuberi/Desktop/WRI_Programming/Py_Scripts/Metadata_Management/Raster_Dataset_Getters'

In [22]:
np.unique(data, return_counts=True)

(array([0, 1, 2, 3], dtype=int32),
 array([513007235,  25533521,   1571449,    720087]))

In [None]:
plt.imshow(data)

<matplotlib.image.AxesImage at 0x11e9ef9e8>