clip input data by given grid (for faster processing)

workflow logic:
input data - locations
clip input data (imagery, labels) by defined grid (e.g. 112 m cell = iamge chip size)
and save it into defined folder

In [1]:
!pip install tondortools

import tondortools.geo #if you get error, check the kernel you are running this in



In [1]:
import os
import tondortools

from osgeo import ogr, gdal
from tondortools.geo import BoundingBox
from pathlib import Path

import subprocess

import shutil

In [8]:
EPSG = '32636' #Amman
#EPSG = '32733' #Kinshasa

grid_file = Path('/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/grid_224m_select100.shp')
#grid_file = Path('/home/lucie/GDA_UGS/ESA_GDA_UGS/Kinshasa/01_processing_testing/predict_outsideAOI_test/grid_112m_test.shp')
#work_folder = Path('/home/lucie/GDA_UGS/ESA_GDA_UGS/Kinshasa/02_model_input')
work_folder = Path('/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input')

#if img_file = labels > apply variant='lbl' in clip_by_grid function
img_file = Path('/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif')
lbl_file = Path('/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif')

#class distribution
#tile = '22' #2-digit format (e.g. 01)
zfill_value = 4 #length of the numbering convention (e.g.0001)

dst_folder_img = 'img_100' #OR: imagery
dst_folder_lbl = 'lbl_100'

In [9]:
def clip_by_grid_to_imgchips(grid_file,img_file,dst_parent_folder,EPSG,zfill_value,variant='img'):
    
    """
    variant = 'img' >>> clipping satelite imagery
    variant = 'lbl' >>> clipping labels (reference data)
    """
    
    processing_tiles_ds = ogr.Open(str(grid_file))
    processing_tiles_lyr = processing_tiles_ds.GetLayer()
    processing_tiles_srs = processing_tiles_lyr.GetSpatialRef()
    processing_tiles_epsg = processing_tiles_srs.GetAttrValue('AUTHORITY', 1)


    for feature in processing_tiles_lyr:
        #print(feature['id'])
        feature_wkt = feature.geometry().ExportToWkt()
        geom = ogr.CreateGeometryFromWkt(feature_wkt)
        bbox = BoundingBox.from_geom(geom, EPSG)

        #dst_folder = dst_parent_folder.joinpath(str(int(feature['id'])))
        dst_folder = dst_parent_folder
        print(dst_folder)
        if not dst_folder.exists():
            os.makedirs(dst_folder)
            
        if variant=='img':
            dst_file = dst_folder.joinpath(f'{str(int(feature["id"])).zfill(zfill_value)}_img.tif')
            print(dst_file)
      
        else:
            #dst_file = dst_folder.joinpath(f'tile{str(int(feature["id"]))}_labels.tif')
            dst_file = dst_folder.joinpath(f'{str(int(feature["id"])).zfill(zfill_value)}_lbl.tif')

        cmd = ["gdalwarp",
                       "-t_srs", "EPSG:{}".format(EPSG),
                       "-te", str(bbox.xmin), str(bbox.ymin), str(bbox.xmax),
                       str(bbox.ymax),
                       "-r", 'near',
                       "-co", "COMPRESS=DEFLATE",
                       str(img_file),
                       str(dst_file)]

        subprocess.run(cmd)
        
    return dst_file



In [10]:
img_dist_file = clip_by_grid_to_imgchips(grid_file,img_file,work_folder,EPSG,zfill_value,variant='img')
lbl_dist_file = clip_by_grid_to_imgchips(grid_file,lbl_file,work_folder,EPSG,zfill_value,variant='lbl')

/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0120_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0120_img.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0121_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using interna

...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0139_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0139_img.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0140_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/mod

...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0159_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0159_img.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0160_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/mod

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0178_img.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0179_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying n

...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0197_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0197_img.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0205_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/mod

...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0223_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0223_img.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0224_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/mod

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0242_img.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0243_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying n

...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0261_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0261_img.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0262_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/mod

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0280_img.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0281_img.tif
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif [1/1] : 0Using internal nodata values (e.g. 0) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/imagery_clip_3808m_GRNIR.tif.
Copying n

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0138_lbl.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0159_lbl.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0188_lbl.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0209_lbl.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0230_lbl.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0259_lbl.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_

Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif to destination /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/0280_lbl.tif.
...10...20...30...40...50...60...70...80...90...100 - done.
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input
Creating output file that is 448P x 448L.
Processing /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif [1/1] : 0Using internal nodata values (e.g. 255) for image /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/labels_clip_3808m_8bit.tif.
Copying nodata values from source /home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_

In [7]:
print(img_dist_file)

/home/lucie/GDA_UGS/ESA_GDA_UGS/Kinshasa/02_model_input/0155_img.tif


In [12]:
# create img & lbl directories

def organize_files(work_dir,img_dir,variant='img'):

    """
    variant = 'img' >>> sorting satelite imagery
    variant = 'lbl' >>> sorting labels (reference data)
    """
    
    dst_folder = work_dir.joinpath(img_dir)
    print(dst_folder)
    #dst_lbl = work_dir.joinpath(lbl_dir)

    os.mkdir(dst_folder)
    #os.mkdir(dst_lbl)

    # split img chips into files by "lbl" and "img"
    import shutil

    for file_item in os.listdir(work_dir):
        if variant=='img':
            if "_img" in file_item:
                src_img=work_dir.joinpath(file_item)
                shutil.move(src_img, dst_folder)
            else:
                continue
                
        else:
            if "_lbl" in file_item:
                src_lbl=work_dir.joinpath(file_item)
                shutil.move(src_lbl, dst_folder)
            else:
                continue

In [13]:
organize_files(work_folder,dst_folder_img)
organize_files(work_folder,dst_folder_lbl,variant='lbl')

/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/img_100
/home/lucie/GDA_UGS/ESA_GDA_UGS/GDA_urban/Phase2/model_input/lbl_100


In [5]:
def organize_files_v2(dst_folder,work_dir,code_word):
    
    """
    usage: for specific folder structure
    * workflow: cut raster into tiles, which will be further split into image chips *
    
    code_word = '_imagery' >>> organizing images into imagery folder
    code_word = '_labels' >>> organizing images into labels folder
    """
    dst_path = work_dir.joinpath(dst_folder)

    def create_dir(path):
        if os.path.exists(path) == False:
            os.makedirs(path)
        
    create_dir(dst_path)

    for file_item in os.listdir(work_dir):
        if code_word in file_item:
            src_img=work_dir.joinpath(file_item)
            shutil.move(src_img, dst_path)
        else:
            continue

In [6]:
#organize_files(dst_folder_img,work_folder,code_word='_img')
organize_files_v2(dst_folder_lbl,work_folder,code_word='_labels')

#### class distribution (lbl related)

In [None]:
import rasterio as rs 
import csv
import numpy as np

#img = r"/home/lucie/GDA_UGS/ESA_GDA_UGS/Freetown/input_data_Phase3/8/lbl_tile8.tif"
img = lbl_file
base_dir = Path('/home/lucie/GDA_UGS/ESA_GDA_UGS/Kinshasa')

image= rs.open(img)
imgarr=image.read()
print("Shape of training data  is: ",imgarr.shape) 
invarr= image.read(1)

i,j = np.unique(invarr, return_counts= True)
#print("**** number of sample pixels for each class ****")
#plt.bar(i, j,alpha=0.7, tick_label=i)

"""
#src: https://www.pythontutorial.net/python-basics/python-write-csv-file/
csv_path = base_dir.joinpath('class_distribution.csv')

header=['tile','other','low_veg_active','low_veg_inactive','high_veg_shrub','bare_soil','high_veg_trees','water','buildings','roads']
  
#data = [version,kernel_size,n_epochs_real,batch_size,weights,num_classes,early_stopping.monitor,early_stopping.patience,shape,current_date,train_input_size_orig,num_training_samples,str(config.__dict__),note]
data = [tile,*j] #asterisk >> array without brackets (src: https://blog.finxter.com/how-to-print-a-numpy-array-without-brackets-in-python/)
print(data)

if os.path.exists(csv_path):
    with open(csv_path, 'a') as f:
        writer = csv.writer(f)
        writer.writerow(data)
else:
    with open(csv_path, 'w', encoding='UTF8') as f:
        writer = csv.writer(f)

        # write the header
        writer.writerow(header)

        # write the data
        writer.writerow(data)
"""

Shape of training data  is:  (1, 51077, 94160)
