In [None]:
import random
import os
import shutil
import copy
from skimage import measure
from skimage import morphology as skim
import skimage.io as skio
import warnings
import pandas as pd
import numpy as np
import pathlib
import geopandas as gpd
from rasterio import features, coords
from rasterio.plot import reshape_as_raster
import rasterio
from shapely.geometry import shape
from osgeo import gdal

from cropmask.misc import parse_yaml, make_dirs

from cropmask.preprocess import PreprocessWorkflow
wflow = PreprocessWorkflow("/home/ryan/work/CropMask_RCNN/cropmask/preprocess_config.yaml", 
                                 "/mnt/point/western_nebraska_landsat_scenes/LT050320312005082801T1-SC20190418222350/",
                                 "/mnt/point/external/2005_NE_Center_Pivots/nbextent-clipped-to-western.geojson")
directory_list = wflow.setup_dirs()

band_list = wflow.yaml_to_band_index()

product_list = wflow.get_product_paths(band_list)

stacked_arr = wflow.load_and_stack_bands(product_list)

wflow.stack_and_save_bands()

In [None]:
wflow.negative_buffer_and_small_filter(-31, 100)

In [None]:
del src

In [None]:
src = rasterio.open(wflow.stacked_path)

In [None]:
band_indices = [int(b) for b in wflow.band_list]

In [None]:
rasterio.windows.Window

In [None]:
import os
from itertools import product
import rasterio as rio
from rasterio import windows

in_path = wflow.stacked_path

out_path = wflow.GRIDDED_IMGS
output_filename = 'tile_{}-{}.tif'

In [117]:
%%time
with rasterio.open(in_path) as inds:
    chip_list = get_tiles_for_threaded_map(inds)

CPU times: user 101 ms, sys: 5.07 ms, total: 106 ms
Wall time: 101 ms


In [113]:
import threading

def concurrent_map(func, data):
    """
    Similar to the bultin function map(). But spawn a thread for each argument
    and apply `func` concurrently.

    Note: unlike map(), we cannot take an iterable argument. `data` should be an
    indexable sequence.
    """

    N = len(data)
    result = [None] * N

    # wrapper to dispose the result in the right slot
    def task_wrapper(i):
        result[i] = func(data[i])

    threads = [threading.Thread(target=task_wrapper, args=(i,)) for i in range(N)]
    for t in threads:
        t.start()
    for t in threads:
        t.join()

    return result

In [None]:
def concurrent_map(func, data):
    """
    Similar to the bultin function map(). But spawn a thread for each argument
    and apply `func` concurrently.

    Note: unlike map(), we cannot take an iterable argument. `data` should be an
    indexable sequence.
    """

    N = len(data)
    result = [None] * N

    # wrapper to dispose the result in the right slot
    def task_wrapper(i):
        result[i] = func(data[i])

    threads = [threading.Thread(target=task_wrapper, args=(i,)) for i in range(N)]
    print(len(threads))
    for t in threads:
        t.start()
    for t in threads:
        t.join()
 
    return result

from multiprocessing.dummy import Pool as ThreadPool

def map_threads(func, sequence, threads=8):
    pool = ThreadPool(threads)
    results = pool.map_async(func, sequence)
    pool.close()
    pool.join()
    return results

def get_tiles_for_threaded_map(ds, width=256, height=256):
    """
        Returns a list of tuple where each tuple is the window and transform information for the image chip. 
    """
    nols, nrows = ds.meta['width'], ds.meta['height']
    offsets = product(range(0, nols, width), range(0, nrows, height))
    big_window = windows.Window(col_off=0, row_off=0, width=nols, height=nrows)
    chip_list = []
    def get_win(ds, col_off, row_off, width, height, big_window):
        """
        Returns the bounds of each image chip/tile as a rasterio window object as well as the transform
        as a tuple like (rasterio.windows.Window, transform)
        """
        window =windows.Window(col_off=col_off, row_off=row_off, width=width, height=height).intersection(big_window)
        transform = windows.transform(window, ds.transform)
        return (window, transform)
    chip_list = list(map(lambda x: get_win(ds, x[0], x[1], width, height, big_window), offsets))
    return chip_list

def write_by_window(ds, out_dir, output_name_template, meta, window, transform):
    """Writes an image chip given the source meta, the chip window, and the chip transform"""
    meta['transform'] = transform
    meta['width'], meta['height'] = window.width, window.height
    outpath = os.path.join(out_dir,output_name_template.format(int(window.col_off), int(window.row_off)))
    with rasterio.open(outpath, 'w', **meta) as outds:
        outds.write(ds.read(window=window))
    return outpath

def grid_images_rasterio_controlled_threads(in_path, out_dir, output_name_template='tile_{}-{}.tif'):
    """
    Grids up imagery to a variable size. Filters out imagery with too little usable data.
    appends a random unique id to each tif and label pair, appending string 'label' to the 
    mask. Inspired by https://gis.stackexchange.com/questions/285499/how-to-split-multiband-image-into-image-tiles-using-rasterio
    """    
    with rasterio.open(in_path) as src:
        meta = src.meta.copy()
        chip_list = get_tiles_for_threaded_map(src)
        out_paths = list(map_threads(lambda x: write_by_window(src, out_dir, output_name_template, meta, x[0], x[1]), chip_list))
    return out_paths


def grid_images_rasterio_concurrent_map(in_path, out_dir, output_name_template='tile_{}-{}.tif'):
    """
    Grids up imagery to a variable size. Filters out imagery with too little usable data.
    appends a random unique id to each tif and label pair, appending string 'label' to the 
    mask. Inspired by https://gis.stackexchange.com/questions/285499/how-to-split-multiband-image-into-image-tiles-using-rasterio
    """    
    with rasterio.open(in_path) as src:
        meta = src.meta.copy()
        chip_list = get_tiles_for_threaded_map(src)
        out_paths = list(concurrent_map(lambda x: write_by_window(src, out_dir, output_name_template, meta, x[0], x[1]), chip_list))
    return out_paths

def grid_images_rasterio_regular_map(in_path, out_dir, output_name_template='tile_{}-{}.tif'):
    """
    Grids up imagery to a variable size. Filters out imagery with too little usable data.
    appends a random unique id to each tif and label pair, appending string 'label' to the 
    mask. Inspired by https://gis.stackexchange.com/questions/285499/how-to-split-multiband-image-into-image-tiles-using-rasterio
    """    
    with rasterio.open(in_path) as src:
        meta = src.meta.copy()
        chip_list = get_tiles_for_threaded_map(src)
        out_paths = list(map(lambda x: write_by_window(src, out_dir, output_name_template, meta, x[0], x[1]), chip_list))
    return out_paths

In [None]:
%%time
out_paths = grid_images_rasterio_controlled_threads(wflow.stacked_path, wflow.GRIDDED_IMGS)

In [None]:
%%time
out_paths = grid_images_rasterio(wflow.stacked_path, wflow.GRIDDED_IMGS)

868


In [79]:
out_paths

<map at 0x7fd38d7aa2b0>

In [None]:
# Get block dimensions of src
for ji, window in src.block_windows(1):

    # read B,G,R,NIR band
    r = src.read((1,2,3,4), window=window)

    # Skip image if missing data
    if 0 in r:
        continue

    else:

        # Create chip id
        chip_name = image_name + '_' + str(ji[0]) + '_' + str(ji[1])                    

        # Create directory for image chip and subdirectories for image and labels
        chip_dir = prep_directory + '/' + chip_name + '/'                    
        img_dir = chip_dir + '/image/'
        mask_dir = chip_dir + '/class_masks/'

        # list of directories to map over
        dirs = [chip_dir, img_dir, mask_dir]

        # Make chip directory and subdirectories
        for d in dirs:
            pathlib.Path(d).mkdir(parents=True, exist_ok=True)

        # Open a new GeoTiff data file in which to save the image chip
        with rasterio.open((img_dir + chip_name + '.tif'), 'w', driver='GTiff',
                   height=r.shape[1], width=r.shape[2], count=4,
                   dtype=rasterio.uint16, crs=src.crs, 
                   transform=src.transform) as new_img:

            # Write the rescaled image to the new GeoTiff
            new_img.write(r)