In [1]:
import os
import multiprocessing
import logging
import math
from itertools import product, repeat

import rasterio
from rasterio import windows
from rasterio.enums import Resampling

import geopandas as gpd
import fiona
from shapely import geometry

In [2]:
def generate_polygon(bbox):
    """
    Generates a list of coordinates: [[x1,y1],[x2,y2],[x3,y3],[x4,y4],[x1,y1]]
    """
    #(lon, lat) of top left corner
    tl = geometry.Point(bbox[0], bbox[1])
    #(lon, lat) of bottom left corner
    bl = geometry.Point(bbox[2],bbox[1])
    #(lon, lat) of top right corner
    tr = geometry.Point(bbox[2],bbox[3])
    #(lon, lat) of bottom right corner
    br = geometry.Point(bbox[0],bbox[3])
    vertex_list = [tl, bl, tr, br]
    
    #print(f'vertex list: {vertex_list}')
    polygon = geometry.Polygon([[v.x, v.y] for v in vertex_list])
    
    return polygon

In [3]:
def make_gdf(polygons, attr_dict):
    gs = gpd.GeoSeries(polygons)
    df = pd.DataFrame(data=attr_dict)

    gdf = gpd.GeoDataFrame(df, geometry=gs)
    
    return gdf

In [4]:
def chip(args, in_tif):
    """"width & height not including overlap i.e requesting a 256x256 window with 
        1px overlap will return a 258x258 window (for non edge windows)"""
    chip_pix = args[0]
    overlap_pix = args[1]
    out_dir = args[2]
    
    #overlap_pix = int(chip_pix * overlap_percent)
    #nonoverlap_pix = int(chip_pix - (overlap_pix * 2))
    #print(overlap_pix, nonoverlap_pix)

    with rasterio.open(in_tif, 'r') as src:
        attr_dict={}
        polygons = []
        basenames = []
        src_crs = src.crs
        
        #create a 256x256 grid with no overlap. this represents all the non-overlapping parts of the chips.
        offsets = product(range(0, src.meta['width'], chip_pix), range(0, src.meta['height'], chip_pix))
        
        #loop though our 256x256 grid...
        for col_off, row_off in offsets:
            #print(f"col_off: {col_off}, row_off: {row_off}")
            out_dim = chip_pix + (overlap_pix * 2)
            
            #.. and buffer our 256x256 grid with our overlap value. Results is a 512x512px image that overlaps 25% with neighbors.
            Window = windows.Window(
                col_off=col_off - overlap_pix,
                row_off=row_off - overlap_pix,
                width=out_dim, 
                height=out_dim)
            #print(Window)
            
            #get the affine matrix for our new 512x512px chip
            win_transform = src.window_transform(Window)
            
            #read in our window. boundless=True allows us to go outside the bounds of our input image, resulting in a buffer of value 0,0,0.
            data = src.read(window=Window,
                           boundless=True)
            #print(f"data: {data.shape}")

            #pretty formating of our output chip filenames with column and row counts
            basename = os.path.splitext(os.path.basename(in_tif))[0]
            col_count = col_off // chip_pix
            row_count = row_off // chip_pix
            out_path = os.path.join(out_dir, f"{basename}_{col_count}_{row_count}.jpg")
            #print(out_path)
            
            #building a custom jpeg profile for our chip due to some gdal/rasterio bugs in walking from input geotiff to output jpeg
            profile={'driver': 'JPEG',
                    'count': src.count,
                    'dtype': rasterio.ubyte,
                    'height': out_dim,
                    'width': out_dim,
                    'transform': win_transform,
                    'crs': src_crs}
            
            #write the chip
            with rasterio.open(out_path, 'w', **profile) as dst:
                dst.write(data)
                print(f"{out_path}, WROTE")
                
            #get the chip's bounding box
            bounds = rasterio.windows.bounds(Window, src.transform)
            #print(f"bounds: {bounds}")
            polygon = generate_polygon(bounds)
            polygons.append(polygon)
            #get the name of the chip for the attribute table.
            basenames.append(basename)
            #print(f"poly done")
            
        attr_dict['filename'] = basenames
            
        return (polygons, attr_dict)

In [8]:
#### EDIT THESE #######################
file_list = r"/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/03_gdalwarp_blocksize/tif_list_12.tif"
in_file = r"/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/03_gdalwarp_blocksize/kahoolawe_233.tif"
out_dir = r"/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/testing"
out_gpkg = r"kaho_233.gpkg"
log_name = r'log.txt'
out_crs = {'init':'epsg:26904'}
#### STOP EDITING ####################

size=256
overlap=128

log_name = os.path.join(out_dir, log_name)

In [6]:
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s %(message)s",
    handlers=[
        logging.FileHandler(log_name),
        logging.StreamHandler()
    ])

In [None]:
with open(file_list, 'r') as f:
    in_paths = [line.strip() for line in f]
    
args = [[size, overlap, out_dir]] * len(in_paths)
zipped = zip(args, in_paths)

pool=multiprocessing.Pool(processes=4)
results = pool.starmap(chip, zipped)
print(type(results))

dfs = pool.starmap(make_gdf, results)

pool.close()
pool.join()

/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/testing/kahoolawe_1516_0_0.jpg, WROTE
/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/testing/kahoolawe_1582_0_0.jpg, WROTE
/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/testing/kahoolawe_1580_0_0.jpg, WROTE
/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/testing/kahoolawe_1517_0_0.jpg, WROTE
/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/testing/kahoolawe_1581_0_0.jpg, WROTE
/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/testing/kahoolawe_1530_0_0.jpg, WROTE
/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/testing/kahoolawe_1529_0_0.jpg, WROTE
/media/ross/hdd/01_projects/03_marine_debris/md_preprocessing_archive_oct2019/kahoolawe/testing/kahoolawe_1589_0_0.jpg

In [None]:
results_df = pd.concat(dfs, ignore_index=True)
results_gdf =gpd.GeoDataFrame(results_df, crs=out_crs, geometry='geometry')

out_path = os.path.join(out_dir, out_gpkg)
results_gdf.to_file(out_path, driver='GPKG')