create image chips for satelite imagery & labels

### create image chips

In [24]:
import numpy as np
import os 
import rasterio as rs 
from pathlib import Path

In [25]:
#@title Parameters
work_dir = Path('/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/AI_extensions/data')
os.chdir(work_dir)

# the imagery and labels should be inside the work_dir 
imagery = "imagery.tif"
label = "labels.tif"

zfill_value = 4 #length of the numbering convention (e.g.0001)

In [28]:
def trainchips_single(img, zfill_value, stride=224, winsize=224, variant='img'):
    
    '''
    (stride = winsize/2 >> get 50% overlap)
    (depends on model input size, e.g. imagenet uses 224 or 256 pixels)
    
    variant - when creating image chips for labels, variant='lbl'
    '''
    
    image= rs.open(img)
    imgarr=image.read()
    print("Shape of training data  is: ",imgarr.shape) 
    invarr= image.read(1)

    index=0
    for i in np.arange(0,imgarr.shape[1],stride):
        for j in np.arange(0,imgarr.shape[2],stride):
            img=imgarr[:,i:i+winsize,j:j+winsize]
            x,y = (j*image.transform[0]+image.transform[2]),(image.transform[5]+i*image.transform[4])
            transform= [image.transform[0],0,x,0,image.transform[4],y]
            index+=1
            
            if variant=='img':
                with rs.open(str(index).zfill(zfill_value)+"_img.tif","w",driver='GTiff', count=imgarr.shape[0], dtype=imgarr.dtype,
                         width=winsize, height=winsize, transform=transform, crs=image.crs) as raschip:
                         raschip.write(img)
            elif variant=='lbl':
                with rs.open(str(index).zfill(zfill_value)+"_lbl.tif","w",driver='GTiff', count=imgarr.shape[0], dtype=imgarr.dtype,
                         width=winsize, height=winsize, transform=transform, crs=image.crs) as raschip:
                         raschip.write(img)
                        
    print("The number of image chips is: ", index)

In [29]:
trainchips_single(imagery,zfill_value=zfill_value)
trainchips_single(label,zfill_value=zfill_value,variant='lbl')

Shape of training data  is:  (3, 7999, 7999)
The number of image chips is:  1296
Shape of training data  is:  (1, 7999, 7999)
The number of image chips is:  1296


### organize files

distribute the created image chips into img and lbl folders

In [30]:
# create img & lbl directories

dst_img = work_dir.joinpath('img')
dst_lbl = work_dir.joinpath('lbl')

os.mkdir(dst_img)
os.mkdir(dst_lbl)

In [31]:
# split img chips into files by "lbl" and "img"
import shutil

for file_item in os.listdir(work_dir):
    if "_img" in file_item:
        src_img=work_dir.joinpath(file_item)
        shutil.move(src_img, dst_img)
    elif "_lbl" in file_item:
        src_lbl=work_dir.joinpath(file_item)
        shutil.move(src_lbl, dst_lbl)
    else:
        continue