# Final Project: Segmentation of satellite images

## 1 Data

In [3]:
from osgeo import gdal
import numpy as np
import random as rd
import os

# get images:
data = gdal.Open(os.path.join("data", "satellite_image.tif"))
segmentation = gdal.Open(os.path.join("data", "final.tif"))

# get coordinate info:
ulx, xres, xskew, uly, yskew, yres = data.GetGeoTransform()

# define grid for splits in pixels:
grid_x_px = np.arange(0, data.RasterXSize, step=1000)
grid_y_px = np.arange(0, data.RasterYSize, step=1000)

# calculate grid for splits in coordinates:
grid_x_co = ulx + np.arange(0, data.RasterXSize, step=1000) * xres
grid_y_co = uly + np.arange(0, data.RasterYSize, step=1000) * yres

# set training set, validation set and test ratio:
data_set_folders = ["training", "validation", "test"]
data_set_ratios = [0.7, 0.15, 0.15]

# create directories if needed:
for folder in data_set_folders:
    if not os.path.isdir(os.path.join("data", folder)):
        os.mkdir(os.path.join("data", folder))
        os.mkdir(os.path.join("data", folder, "images"))
        os.mkdir(os.path.join("data", folder, "annotations"))

# split images:
for ix in range(len(grid_x_co) - 1):
    for iy in range(len(grid_y_co) - 1):
        # choose data set randomly:
        data_set_folder = rd.choices(data_set_folders, weights=data_set_ratios)[0]
        # define setting for translation:
        translate_options = gdal.TranslateOptions(gdal.ParseCommandLine(f"-outsize 1000 1000 -projwin {grid_x_co[ix]} {grid_y_co[iy]} {grid_x_co[ix + 1]} {grid_y_co[iy + 1]}"))
        # split satellite image:
        gdal.Translate(os.path.join("data", data_set_folder, "images", f"img_{ix}_{iy}.tif"), data, options = translate_options)
        # split annotations:
        gdal.Translate(os.path.join("data", data_set_folder, "annotations", f"img_{ix}_{iy}.tif"), segmentation, options = translate_options)