# Extraction of cutouts from satellite images

**Author:** [Lennart Seeger]<br>
**Date created:** 2021/04/24<br>
**Last modified:** 2023/03/24<br>

In [None]:
import sys
import rasterio as rio
from pyproj import Transformer
from PIL import Image
import random
import numpy as np
import os

sys.path.insert(1, '../src')
%load_ext autoreload
%autoreload 2
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'                

In [None]:
num_images=100000
data_std="avg_std30"
source_data="valid_positions_"+data_std+".npy"
image_size=256

In [None]:
valid_positions=np.load("../data/"+source_data)
print(valid_positions.shape)
random_indices=random.sample(range(len(valid_positions)), len(valid_positions))
valid_positions_extraction=valid_positions[random_indices]

In [None]:
cog="/home/jovyan/work/satellite_data/all_cog.tif"
size=256
image_size=64
batch_size=5000
start_row=0
start_col=0
out_crs=3857
element_counter=0
threshold=1000
with rio.open(cog) as dataset:
    transformer = Transformer.from_crs(
                dataset.crs.to_epsg(), out_crs, always_xy=True)
    for element in valid_positions_extraction:
        row=element[0]
        col=element[1]
        window = dataset.read(window=((row, row + size), (col, col + size)))
        # yield list of windows for batch
        window=np.moveaxis(window,0,2)
        #test-----------------------------------------------
        if((window.sum(axis=2)==(255*3)).sum()>threshold or (window.sum(axis=2)==0).sum()>threshold):
            continue
        #----------------------------------
        
        #optional sample down directly
        #sample images down here
        #window=reshape_images(window.reshape(1,256,256,3), image_size = 64).reshape(64,64,3)
        
        im = Image.fromarray(window)
        im.save("../data/"+data_std+"/"+str(element_counter)+".jpeg")
        element_counter+=1
        if element_counter%1000==0:
            print(f"progress: {(element_counter/num_images*100):.2f}%")
        if element_counter>num_images-1:
            break