In [None]:
import os
import glob
import utils

In [None]:
filserver = "/filserver/user/"
dataserver = "/data/user/"
year = '2020'
tile_size = 512

import shutil

img_dest_folder = os.path.join(dataserver, "imagery", "512_rnd")

types = ["test", "train"]
sensors = ["s1", "s2", "s1s2"]
functions = ["mask", "img"]

In [None]:
import geojson
with open('/filserver/user/geodata/train_test_tiles_rndplc.geojson') as f:
    gj = geojson.load(f)
features = gj['features']
tile_list = [f['properties']['name'] for f in features]
print(len(tile_list))
train_tiles = [f['properties']['name'] for f in features if f['properties']['type'] == 'train']
print(len(train_tiles))
test_tiles = [f['properties']['name'] for f in features if f['properties']['type'] == 'test']
print(len(test_tiles))

percent_train = (len(train_tiles) /len(tile_list)) * 100
print(percent_train)

display(train_tiles[:10])

In [None]:
def make_clean_folder(dir, name):
    folder = os.path.join(dir, name)
    if os.path.exists(folder):
        shutil.rmtree(folder)
        os.mkdir(folder)
    else:
        os.mkdir(folder)
    return folder

In [None]:
for type in types:
    type_folder = make_clean_folder(img_dest_folder, type)
    for sensor in sensors:
        sensor_folder = make_clean_folder(type_folder, sensor)
        for func in function:
            make_clean_folder(sensor_folder, func)

In [None]:
import numpy as np
def is_cloudy(array, value=15, threshold=0.05):
    value_count = (array > value).sum()
    value_percent = value_count / array.size

    if value_percent > threshold:
        return True
    else:
        return False
    
def has_nodata(array, threshold=0.05, test_band=0):
    array = array[test_band]
    value_count = np.count_nonzero(array==0)
    value_percent = value_count / array.size

    if value_percent > threshold:
        return True
    else:
        return False

In [None]:
from itertools import product
import rasterio as rio
from rasterio import windows

def get_windows(ds, tiles, tile_size=512):
    ncols, nrows = ds.meta['width'] - (ds.meta['width'] % tile_size), ds.meta['height'] - (ds.meta['height'] % tile_size)
    offsets = zip([int(t.split('_')[0]) for t in tiles], [int(t.split('_')[-1]) for t in tiles])
    big_window = windows.Window(col_off=0, row_off=0, width=ncols, height=nrows)
    for col_off, row_off in offsets:
        window = windows.Window(col_off=col_off, row_off=row_off, width=tile_size, height=tile_size).intersection(big_window)
        transform = windows.transform(window, ds.transform)
        yield window, transform

def tile_scene(img, date, sensor, tile_set, type, tile_size, mask_ds, clouds=False):
        
    output_folder_img = os.path.join(dataserver, f"imagery/{tile_size}_rnd", type, sensor, 'img')
    if not os.path.exists(output_folder_img):
        os.mkdir(output_folder_img)
    output_folder_mask = os.path.join(dataserver, f"imagery/{tile_size}_rnd", type, sensor, 'mask')
    if not os.path.exists(output_folder_mask):
        os.mkdir(output_folder_mask)

    with rio.open(img) as in_ds:
        meta = in_ds.meta.copy()
        msk_meta = mask_ds.meta.copy()

        for window, transform in get_windows(in_ds, tile_set, tile_size=tile_size):
            meta['transform'] = transform
            meta['width'], meta['height'] = window.width, window.height
            msk_meta['transform'] = transform
            msk_meta['width'], msk_meta['height'] = window.width, window.height
            outpath_img = os.path.join(output_folder_img, f'{date}_{int(window.col_off)}_{int(window.row_off)}.png')
            outpath_mask = os.path.join(output_folder_mask, f'{date}_{int(window.col_off)}_{int(window.row_off)}.png')
            
            if clouds:
                meta['count'] = 12
                cld_tile = in_ds.read(13, window=window)
                if is_cloudy(cld_tile):
                    continue
                else:
                    with rio.open(outpath_img, 'w', **meta) as out_img_ds:
                        out_img_ds.write(in_ds.read(tuple(range(1,13)), window=window))
                    with rio.open(outpath_mask, 'w', **msk_meta) as out_mask_ds:
                        out_mask_ds.write(mask_ds.read(window=window))
            elif sensor == 's1s2':
                tile = in_ds.read(window=window)
                if has_nodata(tile, test_band=13):
                    continue
                else:
                    with rio.open(outpath_img, 'w', **meta) as out_img_ds:
                        out_img_ds.write(tile)
                    with rio.open(outpath_mask, 'w', **msk_meta) as out_mask_ds:
                        out_mask_ds.write(mask_ds.read(window=window))
            
            else:
                tile = in_ds.read(window=window)
                if has_nodata(tile):
                    continue
                else:
                    with rio.open(outpath_img, 'w', **meta) as out_img_ds:
                        out_img_ds.write(tile)
                    with rio.open(outpath_mask, 'w', **msk_meta) as out_mask_ds:
                        out_mask_ds.write(mask_ds.read(window=window))

In [None]:
mask_path = '/filserver/user/geodata/mask_2020.tif'
mask_ds = rio.open(mask_path)
s1_images = glob.glob(f'{os.path.join(filserver, "imagery", "Sentinel-1", year)}/*aligned.tiff', recursive=False)
s2_images = glob.glob(f'{os.path.join(filserver, "imagery", "Sentinel-2", year)}/*aligned.tiff', recursive=False)
s1s2_images = glob.glob(f'{os.path.join(filserver, "imagery", "Sentinel-1_Sentinel-2", year)}/*aligned_sar.tiff', recursive=False)

In [None]:
sensor = "s1"
for type in types:
    for func in functions:
        folder = os.path.join(img_dest_folder, type, sensor, func)
        os.makedirs(folder, exist_ok=True)
        for fn in os.listdir(folder):
            os.unlink(os.path.join(folder, fn))
            
for img in s1_images:
    print(img)
    date = os.path.splitext(os.path.basename(img))[0].split("_")[4]
    tile_scene(img, date, sensor, train_tiles, 'train', tile_size, mask_ds)
    tile_scene(img, date, sensor, test_tiles, 'test', tile_size, mask_ds)

In [None]:
sensor = "s2"
for type in types:
    for func in functions:
        folder = os.path.join(img_dest_folder, type, sensor, func)
        os.makedirs(folder, exist_ok=True)
        for fn in os.listdir(folder):
            os.unlink(os.path.join(folder, fn))

for img in s2_images:
    print(img)
    date = os.path.splitext(os.path.basename(img))[0].split("_")[2]
    tile_scene(img, date, sensor, train_tiles, 'train', tile_size, mask_ds, clouds=True)
    tile_scene(img, date, sensor, test_tiles, 'test', tile_size, mask_ds, clouds=True)

In [None]:
sensor = "s1s2"
for type in types:
    for func in functions:
        folder = os.path.join(img_dest_folder, type, sensor, func)
        os.makedirs(folder, exist_ok=True)
        for fn in os.listdir(folder):
            os.unlink(os.path.join(folder, fn))
        

for img in s1s2_images:
    print(img)
    date = os.path.splitext(os.path.basename(img))[0].split("_")[2]
    tile_scene(img, date, sensor, train_tiles, 'train', tile_size, mask_ds)
    tile_scene(img, date, sensor, test_tiles, 'test', tile_size, mask_ds)

In [None]:
400*12

In [None]:
n_s1_images = len(s1_images)
n_s2_images = len(s2_images)
n_s1s2_images = len(s1s2_images)

pot_n_s1_tiles = n_s1_images*400
pot_n_s2_tiles = n_s2_images*400
pot_n_s1s2_tiles = n_s1s2_images*400

n_s1_train_tiles = len(glob.glob(f'/data/user/imagery/512_rnd/train/s1/img/*.png'))
n_s2_train_tiles = len(glob.glob(f'/data/user/imagery/512_rnd/train/s2/img/*.png'))
n_s1s2_train_tiles = len(glob.glob(f'/data/user/imagery/512_rnd/train/s1s2/img/*.png'))

n_s1_test_tiles = len(glob.glob(f'/data/user/imagery/512_rnd/test/s1/img/*.png'))
n_s2_test_tiles = len(glob.glob(f'/data/user/imagery/512_rnd/test/s2/img/*.png'))
n_s1s2_test_tiles = len(glob.glob(f'/data/user/imagery/512_rnd/test/s1s2/img/*.png'))

n_s1_tiles = len(glob.glob(f'/data/user/imagery/512_rnd/*/s1/img/*.png'))
n_s2_tiles = len(glob.glob(f'/data/fruserov/imagery/512_rnd/*/s2/img/*.png'))
n_s1s2_tiles = len(glob.glob(f'/data/user/imagery/512_rnd/*/s1s2/img/*.png'))

utilization_s1_train = n_s1_train_tiles/(n_s1_images*300)
utilization_s2_train = n_s2_train_tiles/(n_s2_images*300)
utilization_s1s2_train = n_s1s2_train_tiles/(n_s1s2_images*300)

utilization_s1_test = n_s1_test_tiles/(n_s1_images*100)
utilization_s2_test = n_s2_test_tiles/(n_s2_images*100)
utilization_s1s2_test = n_s1s2_test_tiles/(n_s1s2_images*100)

utilization_s1 = n_s1_tiles/(n_s1_images*400)
utilization_s2 = n_s2_tiles/(n_s2_images*400)
utilization_s1s2 = n_s1s2_tiles/(n_s1s2_images*400)

print(pot_n_s1_tiles, n_s1_train_tiles, utilization_s1_train, n_s1_test_tiles, utilization_s1_test, utilization_s1)
print(pot_n_s2_tiles, n_s2_train_tiles, utilization_s2_train, n_s2_test_tiles, utilization_s2_test, utilization_s2)
print(pot_n_s1s2_tiles, n_s1s2_train_tiles, utilization_s1s2_train, n_s1s2_test_tiles, utilization_s1s2_test, utilization_s1s2)

In [None]:
print(len(s1s2_images)*300)
len(glob.glob(f'/data/user/imagery/512_rnd/train/s1s2/img/*.png'))

In [None]:
import rasterio as rio
import numpy as np

ex_tile = '1407_77'

img_tiles = glob.glob(f'/data/user/imagery/512_rnd/*/s1s2/img/*{ex_tile}.png')
mask_tiles = glob.glob(f'/data/user/imagery/512_rnd/*/s1s2/mask/*{ex_tile}.png')
print(len(img_tiles), len(mask_tiles))

for img, msk in zip(img_tiles, mask_tiles):
    utils.show_msi(img, bands=[4,13,13])
    # utils.show_single_band_img(msk)
    