In [1]:
import numpy as np
import os
from osgeo import gdal
import matplotlib.pyplot as plt
import rasterio
import zipfile
import shutil
import time
import contextlib

## Crop images

In [2]:
def crop_images(in_filenames, out_filenames, patch_size = 512, m = 0, overlap = 0.5, perc_cashew = 0.1):
    """
        Function to crop images for semantic segmentation model. Considers patch size and overlap percentage.

        Inputs:

        Outputs:
    """
    im = gdal.Open(in_filenames[0])
    gt = gdal.Open(in_filenames[1])
    
    k = m 
    magnitude = 5
    l = 0
    for i in np.arange(0, im.RasterXSize, int(patch_size*(1-overlap))):
        for j in np.arange(0, im.RasterYSize, int(patch_size*(1-overlap))):
            
            # Hardcode so the images will fit together
            j_gt = im.RasterYSize - (j + patch_size)
            # j_gt = j
            
            s = '{:0'+str(magnitude)+'d}'
            
            if gt != None:
            
                gdal.Translate(out_filenames[1] + s.format(k)+'.tif', gt, srcWin = [i, j_gt, patch_size, patch_size])
                
                l += 1
                
                with rasterio.open(out_filenames[1] + s.format(k)+'.tif') as src:
                    cashew = src.read()
                    ten_perc = cashew.sum() > (patch_size**2)*perc_cashew
                    src.close()
                    
                if ten_perc:
                    
                    gdal.Translate(out_filenames[0] + s.format(k)+'.tif', im, srcWin = [i, j, patch_size, patch_size])
    
                    with rasterio.open(out_filenames[0] + s.format(k)+'.tif') as src:
                        img = src.read()
                        px = img[0] >= 0
                        px = px.sum() != (patch_size**2)
                        
                        src.close()
                    
                    k+=1
                    
                    if px:
                        with contextlib.suppress(PermissionError):
                            k-=1
                            os.remove(out_filenames[1] + s.format(k)+'.tif')

    im = None
    gt = None
            
    return k
                

def zipdir(path, ziph):
    # ziph is zipfile handle
    for root, dirs, files in os.walk(path):
        for file in files:
            ziph.write(os.path.join(root, file), 
                       os.path.relpath(os.path.join(root, file), 
                                       os.path.join(path, '..')))

## For Tanzania

In [3]:
if 'Tanzania' not in os.listdir('../Data/Planet/'):
    os.mkdir('../Data/Planet/Tanzania')

In [4]:
# Crop and save planet and GT images for Tanzania
dir = '../Data/Planet/'
sets = ['Train', 'Validation', 'Test']

for s in sets:
    
    m = 0

    o = 0.8

    if (s == 'Test') | (s == 'Validation'):
        o = 0
    
    for i in os.listdir(dir):
        
        if 'Tanzania' + s in i:

            planet_path = dir + i
            vect_path = '../Data/Vector/Tanzania'+s+'Rasterized'+i.split('.')[0][-1]+'.tif'
            
            k = crop_images([planet_path, vect_path], 
                            ['../Data/Planet/Tanzania/CroppedTanzania'+s+'StudyArea_',
                             '../Data/Planet/Tanzania/CroppedTanzania'+s+'GT_'],
                            patch_size=256, 
                            m = m, 
                            overlap = o)
            m = k

            # with contextlib.suppress(PermissionError):
            #     os.remove(planet_path)
            #     os.remove(vect_path)

    print(s, m)

Train 1574
Validation 29
Test 18


In [5]:
# Zip folder with planet and GT images
with zipfile.ZipFile('../Data/PlanetTanzania_16_17.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipdir('../Data/Planet/Tanzania', zipf)

In [None]:
with contextlib.suppress(PermissionError):
        os.remove('../Data/Planet/PlanetTilesIvory_' + str(i+1) + '.tif')
        os.remove('../Data/Vector/IvoryCoastRasterized_' + str(i+1) + '.tif')

## For Ivory Coast

In [3]:
if 'IvoryCoast' not in os.listdir('../Data/Planet/'):
    os.mkdir('../Data/Planet/IvoryCoast')

In [None]:
# Crop and save planet and GT images for Ivory Coast
dir = '../Data/Planet/'
sets = ['Train', 'Validation', 'Test']

for s in sets:
    
    m = 0
    
    o = 0.6

    if (s == 'Test') | (s == 'Validation'):
        o = 0
        
    for i in os.listdir(dir):
        
        if 'IvoryCoast' + s in i:

            planet_path = dir + i
            vect_path = '../Data/Vector/IvoryCoast'+s+'Rasterized'+i.split('.')[0].split('a')[-1]+'.tif'
            
            k = crop_images([planet_path, vect_path], 
                            ['../Data/Planet/IvoryCoast/CroppedIvoryCoast'+s+'StudyArea_',
                             '../Data/Planet/IvoryCoast/CroppedIvoryCoast'+s+'GT_'],
                            patch_size=256,
                            m = m,
                            overlap = o, 
                            perc_cashew=0.1)
            m = k
            
        # print(i, m)
    print(s, m)

In [None]:
with zipfile.ZipFile('../Data/PlanetIvoryCoast_16_17.zip', 'w', zipfile.ZIP_DEFLATED) as zipf:
    zipdir('../Data/Planet/IvoryCoast', zipf)

In [5]:
shutil.rmtree('../Data/Planet/Tanzania')