In [None]:
# downloading utilities
import requests
import json
from requests.auth import HTTPBasicAuth
import urllib.request
# progress and file 
from tqdm import tqdm
# file processing utils
import os
from glob import glob
from multiprocessing import Pool
# geotiff util
import rasterio

In [None]:
os.getcwd()

In [None]:
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient, __version__ 

In [None]:
connect_str = "DefaultEndpointsProtocol=https;AccountName=yihongponding;AccountKey=hnvkEWfrIeYEyLUFcYYKyjujkSWiWcPdV/mS8O5GJ51iWPAkni7opzaA7klbwIGpGY0JANb5pRjGBe1ekHbX9Q==;EndpointSuffix=core.windows.net"
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
container_client = blob_service_client.get_container_client("ponding-il")

In [None]:
blobs = container_client.list_blobs()
for b in blobs:
    print(b.name)

In [None]:
blob_client = blob_service_client.get_blob_client(container="ponding-il", blob="503-1291.tif")
download_file_path = "503-1291.tif"
with open(download_file_path, "wb") as download_file:
    download_file.write(blob_client.download_blob().readall())

# Download file from Planet

This downloads the images from PlanetLab API in quads. Each quad is approximately 4000 pixel * 4000 pixels and sizes around 100MB. 

For each year, Champaign has 20 quads, Illinois has 978 quads, and the entire US midwest has about 10,000 quads.

We want to do this for every year from 2017 to 2021.

In [None]:
BASE_URL = 'https://api.planet.com/basemaps/v1/mosaics/'
API_KEY = '8fb5d85cdcfc40f6b4b9d3f44227142b' # obmitted
auth = HTTPBasicAuth(API_KEY, '')

mosaic_id = '56f00cc2-6be4-4315-9603-c75d6afab225'
url = f'{BASE_URL}{mosaic_id}/quads'
bbox = '-91.51307900019566, 36.970297999852846, -87.49519900023363, 42.50848099959849' #update bbox and page_size
res = requests.get(url=url, auth=auth, params={'bbox':bbox, '_page_size':99999}) 
out = json.loads(res.text)

for i in tqdm(out['items']):
    if i['id'] in os.listdir('./tmp_img/'):
        continue
    urllib.request.urlretrieve(i['_links']['download'], f"./tmp_img/{i['id']}.tif")
    break

In [None]:
arr = rasterio.open("503-1291.tif").read()

In [None]:
help("modules")

# Preprocess

This reprojects quad into a different coordinates and upscales the quad. The upscaled size is around 250MB.

Note:
 - Memory is fine since each quad will take 350MB at max (I think). I have 16Gb on local machine and 10 processes are OK.
 - It will probably be faster if we do upscale while downloading.
 - Also we probably need to replace the system call to python gdal package (according to the meeting).


In [None]:
'''
def upscale(fname):
    print("processing ", fname)
    out_fname = fname.replace('.tif', '-warp.tif')
    if os.path.isfile(out_fname):
        print("already exists")
        return
    os.system(f'gdalwarp -t_srs EPSG:32616 -tr 3 3 -r bilinear {fname} {out_fname}')
    
    # pool multiple thread to preprocess image
files = glob('./*tif')
print(files)
p = Pool(10)
p.map(upscale, files)
'''


In [12]:
import pyproj

ModuleNotFoundError: No module named 'pyproj'

In [1]:
import gdal
gdal.VersionInfo()

'3000200'

In [2]:
import os
os.environ['PROJ_LIB'] = os.getenv('HOME') + '/.conda/envs/py-image_preprocess/share/proj'
os.environ['GDAL_DATA'] = os.getenv('HOME') + '/.conda/envs/py-image_preprocess/share'

In [11]:
print(os.getenv('HOME'))
print(os.environ['PROJ_LIB'])

/home/c3
/home/c3/.conda/envs/py-image_preprocess/share/proj


In [1]:
import osgeo.gdal
osgeo.gdal.__version__

'3.0.2'

In [2]:
from osgeo import osr
osr.GetPROJVersionMajor(), osr.GetPROJVersionMinor()

(6, 2)

In [3]:
options = gdal.WarpOptions(srcSRS='EPSG:3857', dstSRS = 'EPSG:32616', yRes=3, xRes=3)

In [None]:
# New heading

In [5]:
gdal.Warp(destNameOrDestDS='./503-1291-warp.tif', srcDSOrSrcDSTab='./503-1291.tif', options=options)

<osgeo.gdal.Dataset; proxy of <Swig Object of type 'GDALDatasetShadow *' at 0x7f711e699570> >

# Prediction

In [None]:
# TF stuffs
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import (
    Conv2D, 
    concatenate, 
    Dropout, 
    Input, 
    Reshape,
    BatchNormalization, 
    MaxPooling2D, 
    UpSampling2D, 
    ReLU, 
    Conv2DTranspose
)
# data processing pacakges
import numpy as np
import matplotlib.pyplot as plt

In [None]:
tf.__version__

In [None]:
tf.config.list_physical_devices()

In [None]:
print(tf.test.is_gpu_available())
print(tf.test.is_built_with_cuda())

## Define model

In [None]:
# unet model
def conv2d_block(input_tensor, n_filters, kernel_size = 3, batchnorm = True):
    """Function to add 2 convolutional layers with the parameters passed to it"""
    # first layer
    x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
              kernel_initializer = 'he_normal', padding = 'same')(input_tensor)
    if batchnorm:
        x = BatchNormalization()(x)
    x = ReLU()(x)
    
    # second layer
    x = Conv2D(filters = n_filters, kernel_size = (kernel_size, kernel_size),\
              kernel_initializer = 'he_normal', padding = 'same')(x)
    if batchnorm:
        x = BatchNormalization()(x)
    x = ReLU()(x)
    
    return x

def get_unet(input_img, n_filters = 16, dropout = 0.1, batchnorm = True):
    """Function to define the UNET Model"""
    # Contracting Path
    c1 = conv2d_block(input_img, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
    p1 = MaxPooling2D((2, 2))(c1)
    p1 = Dropout(dropout)(p1)
    
    c2 = conv2d_block(p1, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
    p2 = MaxPooling2D((2, 2))(c2)
    p2 = Dropout(dropout)(p2)
    
    c3 = conv2d_block(p2, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
    p3 = MaxPooling2D((2, 2))(c3)
    p3 = Dropout(dropout)(p3)
    
    c4 = conv2d_block(p3, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
    p4 = MaxPooling2D((2, 2))(c4)
    p4 = Dropout(dropout)(p4)
    
    c5 = conv2d_block(p4, n_filters = n_filters * 16, kernel_size = 3, batchnorm = batchnorm)
    
    # Expansive Path
    u6 = Conv2DTranspose(n_filters * 8, (3, 3), strides = (2, 2), padding = 'same')(c5)
    u6 = concatenate([u6, c4])
    u6 = Dropout(dropout)(u6)
    c6 = conv2d_block(u6, n_filters * 8, kernel_size = 3, batchnorm = batchnorm)
    
    u7 = Conv2DTranspose(n_filters * 4, (3, 3), strides = (2, 2), padding = 'same')(c6)
    u7 = concatenate([u7, c3])
    u7 = Dropout(dropout)(u7)
    c7 = conv2d_block(u7, n_filters * 4, kernel_size = 3, batchnorm = batchnorm)
    
    u8 = Conv2DTranspose(n_filters * 2, (3, 3), strides = (2, 2), padding = 'same')(c7)
    u8 = concatenate([u8, c2])
    u8 = Dropout(dropout)(u8)
    c8 = conv2d_block(u8, n_filters * 2, kernel_size = 3, batchnorm = batchnorm)
    
    u9 = Conv2DTranspose(n_filters * 1, (3, 3), strides = (2, 2), padding = 'same')(c8)
    u9 = concatenate([u9, c1])
    u9 = Dropout(dropout)(u9)
    c9 = conv2d_block(u9, n_filters * 1, kernel_size = 3, batchnorm = batchnorm)
    
    outputs = Conv2D(1, (1, 1), activation='sigmoid')(c9)
    model = Model(input_img, outputs)
    model.summary()
    return model

In [None]:
# some constant and define tf memory behavior
CKPT_DIR = './model_ckpt/unet_train_100.tf'
FILE_DIR = './tmp_img/*tif'
OUT_DIR = './tmp_pred/'
THRESHOLD = 0.5
img_size = (224, 224, 5)
physical_devices = tf.config.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True) # this needs to be changed if multiple GPU

## Get model and load trained model

In [None]:
# load trained model
input_img = Input(img_size, name='img')
model = get_unet(input_img, n_filters=32, dropout=0.15, batchnorm=True)
model.load_weights(CKPT_DIR)

## Prediction function that handles a file

In [None]:
def predict_tiff(net, tiff_path, add_gcvi=False):
    base_img = rasterio.open(tiff_path).read()
    base_img = np.transpose(base_img, [1, 2, 0])
    base_img = base_img[:, :, :4]
    if add_gcvi:
        gcvi = base_img[:, :, 3] / base_img[:, :, 1]
        gcvi = gcvi - 1
        gcvi[gcvi > 20] = 20
        gcvi[gcvi < 0] = 0
        gcvi = np.nan_to_num(gcvi, False, 0, 0, 0)
        gcvi = np.expand_dims(gcvi, -1)
        base_img = np.concatenate([base_img, gcvi], 2) 
    
    # track original size and calc how many cuts
    h, w, c = base_img.shape
    h_count = int(h/img_size) + 1
    w_count = int(w/img_size) + 1

    # calculate padded height and width
    h_padded = h_count * img_size
    w_padded = w_count * img_size

    # Pad image and cut into img_size * img_size
    base_img = np.pad(base_img, ((0, h_padded - h), (0, w_padded-w), (0,0)), 'constant')
    base_img = np.reshape(base_img, (h_count, img_size, w_count, img_size, c))
    base_img = np.transpose(base_img, axes=(0, 2, 1, 3, 4))
    base_img = np.reshape(base_img, (-1, img_size, img_size, c)) # nhwc

    # Predict
    out = net.predict(base_img, batch_size = 8)

    # Convert back
    combined = np.zeros((h_padded, w_padded))
    x = 0
    idx = 0
    while x < combined.shape[0]:
        y = 0
        while y < combined.shape[1]:
            combined[x:x+img_size, y:y+img_size] = np.squeeze(out[idx], 2)
            y += img_size
            idx += 1
        x += img_size
    combined_cut = np.array(combined)
    # Trim
    return combined_cut[:h, :w]


In [None]:
def export_tif(fn, file_name, mask):
    meta = rasterio.open(fn).meta.copy()
    meta.update({
        'count':1,
        'dtype':np.uint8
    })
    mask[mask > 0.5] = 1
    mask[mask < 1] = 0
    mask = mask.astype(np.uint8)
    with rasterio.open(os.path.join(OUT_DIR,file_name), 'w', **meta) as dest:
        dest.write(np.expand_dims(mask, 0))


## Prediction driver

Essential loops through every file, pipe them through the model and save the output.

We could streamline is process, i.e., schedule load - predict - save for better performance.

In [None]:
for fn in tqdm(glob(FILE_DIR)):
    if 'udm' in fn:
        continue

    if 'tif' not in fn:
        continue

    file_name = fn.split('\\')[-1]    
    if file_name in os.listdir(OUT_DIR):
        print(f'Already Found: {file_name}...skipping')
        continue

    out_fn = os.path.join(OUT_DIR, file_name)
    mask = predict_tiff(net = model,
                tiff_path = fn,
                add_gcvi = True)
    # we can probably throw this to a different thread
    export_tif(fn, file_name, mask)
    # it's optional to save these two formats
    np.save(os.path.join(OUT_DIR, file_name.split('.')[0] + '.npy'), mask)
    plt.imsave(os.path.join(OUT_DIR, file_name.split('.')[0] + '.png'), mask)
