# Final Project: Segmentation of satellite images

## Data

In [1]:
from osgeo import gdal
import numpy as np
import random as rd
import os

class DataSetCreator:

    def __init__(self, img_paths, ann_paths, margin):
        self.img_paths = img_paths
        self.ann_paths = ann_paths
        self.margin = margin

    def get_gdal_data(self, index):
        img = gdal.Open(self.img_paths[index])
        ann = gdal.Open(self.ann_paths[index])
        return img, ann

    def get_splitting_grid(self, img, split_size):
        ulx, xres, xskew, uly, yskew, yres = img.GetGeoTransform()
        grid_x_co = ulx + np.arange(self.margin, img.RasterXSize - self.margin, step=split_size) * xres
        grid_y_co = uly + np.arange(self.margin, img.RasterYSize - self.margin, step=split_size) * yres
        return grid_x_co, grid_y_co

    def create_folders(self, folders):
        for folder in folders:
            if not os.path.isdir(os.path.join(folder)):
                os.mkdir(os.path.join(folder))
                os.mkdir(os.path.join(folder, "images"))
                os.mkdir(os.path.join(folder, "annotations"))

    def split_img(self, img, ann, grid_x_co, grid_y_co, split_size, folders, ratios):
        for ix in range(1, len(grid_x_co) - 1):
            for iy in range(1, len(grid_y_co) - 1):
                folder = rd.choices(folders, weights=ratios)[0]
                translate_options = gdal.TranslateOptions(gdal.ParseCommandLine(f"-outsize {split_size} {split_size} -projwin {grid_x_co[ix]} {grid_y_co[iy]} {grid_x_co[ix + 1]} {grid_y_co[iy + 1]} -ot Byte"))
                gdal.Translate(os.path.join(folder, "images", f"img_{ix}_{iy}.tif"), img, options = translate_options)
                gdal.Translate(os.path.join(folder, "annotations", f"img_{ix}_{iy}.tif"), ann, options = translate_options)

    def split_data(self, split_size, folders, ratios):
        for p in range(len(self.img_paths)):
            img, ann = self.get_gdal_data(p)
            grid_x_co, grid_y_co = self.get_splitting_grid(img, split_size)
            self.create_folders(folders)
            self.split_img(img, ann, grid_x_co, grid_y_co, split_size, folders, ratios)

In [None]:
source_path = "" # set source path
target_path = "" # set target path
img_paths = [os.path.join(source_path, "")] # set satellite image path
ann_paths = [os.path.join(source_path, "")] # set annotation image path
folders = (os.path.join(target_path, "training"), os.path.join(target_path, "validation"), os.path.join(target_path, "test"))
ratios = (0.7, 0.15, 0.15)
split_size = 128 # set split size
margin = 1500 # set margin
dsc = DataSetCreator(img_paths, ann_paths, margin)
dsc.split_data(split_size, folders, ratios)