## Xview Dataset clipping
This is a component designed to clip the dataset provided by xview. 

In [None]:
#!pip install Pillow claimed aiobotocore botocore s3fs

In [2]:
import os
from PIL import Image
from c3 import operator_utils
import s3fs
import pathlib

In [3]:
#source is the path to the folder with the unzipped .tif images from xview dataset 
source = os.environ.get("directory_path")

#destination is the path to the folder which saves all the extracted tiles. 
destination = os.environ.get("destination")

# use SSL on the COS endpoints?
use_ssl = bool(os.environ.get("use_ssl", True))

#Each image is cropped using a rectangular window with edge_length tile_size_x and tile_size_y which has to be given in number of pixels 
tile_size_x = int(os.environ.get("tile_size_x", 64))
tile_size_y = int(os.environ.get("tile_size_y", 64))

#stride_x is the length in pixels the sliding window is moved to the right after each step
#For tumbling window stride_x must equal tile_size_x and stride_y must equal tile_size_y
stride_x = int(os.environ.get("stride_x", 32))
#stride_y is the length in pixels the sliding window is moved down after completing a row
stride_y = int(os.environ.get("stride_y", 32))

In [None]:
(access_key_id_source, secret_access_key_source, endpoint_source, source) = operator_utils.explode_connection_string(source)
(access_key_id_destination, secret_access_key_destination, endpoint_destination, destination) = operator_utils.explode_connection_string(destination)

In [None]:
s3source = s3fs.S3FileSystem(
    anon=False,
    key=access_key_id_source,
    secret=secret_access_key_source,
    use_ssl=use_ssl,
    client_kwargs={'endpoint_url': endpoint_source}
)

In [None]:
s3destination = s3fs.S3FileSystem(
    anon=False,
    key=access_key_id_destination,
    secret=secret_access_key_destination,
    use_ssl=use_ssl,
    client_kwargs={'endpoint_url': endpoint_destination}
)

In [None]:
for item in s3source.ls(source):
    s3source.get(item, item)
    _, file_extension = os.path.splitext(item)
    clipped_item = os.path.join(destination, item)

    image = Image.open(item)
    width, height = image.size

    x_range = [0]
    while(x_range[-1] + stride_x + tile_size_x < width):
        x_range += [x_range[-1] + stride_x]
    
    y_range = [0]
    while(y_range[-1] + stride_y + tile_size_y < height):
        y_range += [y_range[-1] + stride_y]

    counter = 0
    for x in x_range:
        for y in y_range:
            cropped = image.crop((x,y, x+tile_size_x, y+tile_size_y))  
            dest_path = f'{clipped_item}.{x}.{y}{file_extension}'
            pathlib.Path(os.path.dirname(dest_path)).mkdir(parents=True, exist_ok=True)
            cropped.save(dest_path)
            clipped_item_upload = os.path.join(destination, os.path.basename(dest_path))
            s3destination.put(dest_path, clipped_item_upload)