In [1]:
import os
from datetime import datetime
import shutil
from glob import glob
import rioxarray as rxr
from rioxarray.exceptions import NoDataInBounds
import rasterio.features
import numpy as np
import geopandas as gpd
from shapely.geometry import Polygon
from tqdm import tqdm
import matplotlib.pyplot as plt
from IPython import display
from rasterio.errors import NotGeoreferencedWarning
from scipy.ndimage import gaussian_filter
import importlib.util
from rioxarray.merge import merge_arrays
from scipy.optimize import minimize
from scipy.signal import find_peaks
import warnings
import torch
import pandas as pd
import pickle
import cv2
warnings.filterwarnings("ignore", category=NotGeoreferencedWarning)
np.seterr(divide='ignore', invalid='ignore')

# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"

def recreate_dir(path):
    if os.path.exists(path):
        shutil.rmtree(path)
    os.makedirs(path)
    return path

def load_config(path):
    spec = importlib.util.spec_from_file_location("CFG", path)
    CFG = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(CFG)
    return CFG

In [2]:
DATA_DIR = "data/rybna_202203240654"
CFG = load_config(f"{DATA_DIR}/config.py").CALIB

In [3]:
#configure logging to file
import logging
log_path = f"{DATA_DIR}/logs/calibration_{datetime.now().strftime('%d%m%Y%H%M%S')}.log"
os.makedirs(os.path.dirname(log_path), exist_ok=True)
logging.basicConfig(filename=log_path,level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%Y-%m-%d %H:%M:%S')
logger = logging.getLogger(__name__)
logger.handlers.clear()
#logger.addHandler(logging.StreamHandler())
logger.info("Starting procedure")

In [4]:
TMP_DIR = f"{DATA_DIR}/tmp"
recreate_dir(TMP_DIR)
TIFF_DIR = f"{DATA_DIR}/tiff"
assert os.path.exists(TIFF_DIR), "tiff_dir does not exist. Please run 1_conversion.ipynb first."
GEOTIFF_OPTIM_DIR = f"{DATA_DIR}/geotiff_optim"
assert os.path.exists(GEOTIFF_OPTIM_DIR), "geotiff_dir does not exist. Please run 1_conversion.ipynb first."
TIFF_CAL_DIR = f"{DATA_DIR}/tiff_cal"
GEOTIFF_CAL_DIR = f"{DATA_DIR}/geotiff_cal"
PLOT_CLIP_DIR = f"{DATA_DIR}/plot_clip"
PLOT_CAL_DIR = f"{DATA_DIR}/plot_cal"
TEMP_OPTIM_DATASET_DIR = f"{DATA_DIR}/temp_optim_dataset"
I_CLIP_DIR = f"{TEMP_OPTIM_DATASET_DIR}/i_clip"
J_CLIP_DIR = f"{TEMP_OPTIM_DATASET_DIR}/j_clip"
CLIP_MASK_DIR = f"{TEMP_OPTIM_DATASET_DIR}/clip_mask"

'data/rybna_202203240654/tmp'

In [5]:
if CFG.CACHE and os.path.exists(f"{DATA_DIR}/footprints.pkl"):
    logger.info("Loading footprints from cache")
    with open(f"{DATA_DIR}/footprints.pkl", "rb") as f:
        footprints = pickle.load(f)
else:
    logger.info("Reading footprints from geotiffs")
    geometries = []
    names = []
    for path in tqdm(glob(f"{GEOTIFF_OPTIM_DIR}/*.tiff")):
        raster = rxr.open_rasterio(path)
        footprints = rasterio.features.shapes((raster != raster.rio.nodata).values.astype(np.int16), transform=raster.rio.transform())
        footprints = [Polygon(geom["coordinates"][0]).simplify(10) for geom, colval in footprints if colval == 1]
        assert len(footprints) == 1, "More than one footprint found"
        names.append(os.path.basename(path))
        geometries.append(footprints[0])
    footprints = gpd.GeoDataFrame({"name": names, "geometry": geometries})
    #write CRS
    footprints.crs = CFG.CRS
    with open(f"{DATA_DIR}/footprints.pkl", "wb") as f:
        pickle.dump(footprints, f)

100%|██████████| 890/890 [02:16<00:00,  6.50it/s]


In [6]:
#erode footprints
footprints["geometry"] = footprints["geometry"].buffer(-CFG.EROSION)

In [7]:
def nan_gaussian_filter(arr, sigma):
    """Apply gaussian filter to array while ignoring nans"""
    V=arr.copy()
    V[np.isnan(arr)]=0
    VV=gaussian_filter(V,sigma=sigma)
    W=0*arr.copy()+1
    W[np.isnan(arr)]=0
    WW=gaussian_filter(W,sigma=sigma)
    Z=VV/WW
    Z[np.isnan(arr)]=np.nan
    return Z


In [8]:
if CFG.CACHE and os.path.exists(f"{TEMP_OPTIM_DATASET_DIR}/pairs.pkl"):
    logger.info("Loading cached pairs")
    with open(f"{TEMP_OPTIM_DATASET_DIR}/pairs.pkl", "rb") as f:
        pairs_i, pairs_j, pairs_area = pickle.load(f)
else:
    logger.info("Generating temprature global optimization dataset")
    recreate_dir(TEMP_OPTIM_DATASET_DIR)
    recreate_dir(I_CLIP_DIR)
    recreate_dir(J_CLIP_DIR)
    recreate_dir(CLIP_MASK_DIR)
    pairs_i = []
    pairs_j = []
    pairs_area = []
    idx = 0
    for i in tqdm(range(len(footprints))):
        i_raster = rxr.open_rasterio(f"{GEOTIFF_OPTIM_DIR}/{footprints.iloc[i]['name']}", masked=True)
        for j in range(i+1, len(footprints)):
            if footprints.iloc[i].geometry.intersects(footprints.iloc[j].geometry):
                intersection = footprints.iloc[i].geometry.intersection(footprints.iloc[j].geometry)
                if intersection.area < CFG.MIN_INTERSECTION_AREA:
                    logger.info(f"i ({i}), j ({j}): intersection area too small")
                    continue
                j_raster = rxr.open_rasterio(f"{GEOTIFF_OPTIM_DIR}/{footprints.iloc[j]['name']}", masked=True)
                try:
                    i_clip = i_raster.rio.clip([intersection])
                    j_clip = j_raster.rio.clip([intersection])
                except NoDataInBounds:
                    logger.info(f"i ({i}), j ({j}): NoDataInBounds")
                    continue
                j_clip = j_clip.rio.reproject_match(i_clip)
                i_clip = i_clip.values[0]
                j_clip = j_clip.values[0]
                i_clip = nan_gaussian_filter(i_clip, sigma=CFG.GAUSS_SIGMA)
                j_clip = nan_gaussian_filter(j_clip, sigma=CFG.GAUSS_SIGMA)
                mask = (~np.isnan(i_clip) & ~np.isnan(j_clip)).astype(np.int16)
                i_clip[np.isnan(i_clip)] = np.nanmean(i_clip)
                j_clip[np.isnan(j_clip)] = np.nanmean(j_clip)
                i_clip = cv2.resize(i_clip, (256, 256), interpolation=cv2.INTER_LINEAR)
                j_clip = cv2.resize(j_clip, (256, 256), interpolation=cv2.INTER_LINEAR)
                mask = cv2.resize(mask, (256, 256), interpolation=cv2.INTER_NEAREST)
                #assert i_clip, j_clip, mask dont have nans
                if np.isnan(i_clip).any():
                    logger.info(f"i ({i}), j ({j}): i_clip has nan")
                    continue
                if np.isnan(j_clip).any():
                    logger.info(f"i ({i}), j ({j}): j_clip has nan")
                    continue
                if np.isnan(mask).any():
                    logger.info(f"i ({i}), j ({j}): mask has nan")
                    continue
                np.save(f"{I_CLIP_DIR}/{idx}.npy", i_clip)
                np.save(f"{J_CLIP_DIR}/{idx}.npy", j_clip)
                np.save(f"{CLIP_MASK_DIR}/{idx}.npy", mask)
                pairs_i.append(i)
                pairs_j.append(j)
                pairs_area.append(intersection.area)
                idx += 1
    pairs_i = np.array(pairs_i)
    pairs_j = np.array(pairs_j)
    pairs_area = np.array(pairs_area)
    #pickle dump
    with open(f"{TEMP_OPTIM_DATASET_DIR}/pairs.pkl", "wb") as f:
        pickle.dump((pairs_i, pairs_j, pairs_area), f)

'data/rybna_202203240654/temp_optim_dataset'

'data/rybna_202203240654/temp_optim_dataset/i_clip'

'data/rybna_202203240654/temp_optim_dataset/j_clip'

'data/rybna_202203240654/temp_optim_dataset/clip_mask'

  9%|▊         | 76/890 [15:00<2:40:50, 11.86s/it]


KeyboardInterrupt: 

In [None]:
#pytorch dataset
class TempOptimDataset(torch.utils.data.Dataset):
    def __init__(self, i_clip_dir, j_clip_dir, pairs_i, pairs_j, pairs_area):
        self.i_clip_dir = i_clip_dir
        self.j_clip_dir = j_clip_dir
        self.pairs_i = pairs_i
        self.pairs_j = pairs_j
        self.pairs_area = pairs_area
    def __len__(self):
        return len(self.pairs_i)
    def __getitem__(self, idx):
        i_clip = np.load(f"{self.i_clip_dir}/{idx}.npy")
        j_clip = np.load(f"{self.j_clip_dir}/{idx}.npy")
        mask = np.load(f"{CLIP_MASK_DIR}/{idx}.npy")
        i_clip = torch.tensor(i_clip, dtype=torch.float32)
        j_clip = torch.tensor(j_clip, dtype=torch.float32)
        mask = torch.tensor(mask, dtype=torch.float32)
        i_idx = torch.tensor(self.pairs_i[idx])
        j_idx = torch.tensor(self.pairs_j[idx])
        area = torch.tensor(self.pairs_area[idx])
        #resize to 256x256
        return i_idx, j_idx, i_clip, j_clip, mask, area

In [None]:
dataset = TempOptimDataset(I_CLIP_DIR, J_CLIP_DIR, pairs_i, pairs_j, pairs_area)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=CFG.BATCH_SIZE, shuffle=True, num_workers=CFG.NUM_WORKERS)

In [None]:
CFG = load_config(f"{DATA_DIR}/config.py").CALIB
n_images = len(footprints)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
a_coefs = torch.ones(n_images, dtype=torch.float32, device=device, requires_grad=True)
b_coefs = torch.zeros(n_images, dtype=torch.float32, device=device, requires_grad=True)

optimizer = torch.optim.Adam([a_coefs, b_coefs], lr=CFG.LEARNING_RATE)
for epoch in range(CFG.EPOCHS):
    print(f"Epoch {epoch}/{CFG.EPOCHS}")
    epoch_losses = []
    for i, (i_idx, j_idx, i_clip, j_clip, mask, area) in enumerate(pbar := tqdm(dataloader)):
        optimizer.zero_grad()
        #compute loss
        i_idx, j_idx, i_clip, j_clip, area = i_idx.to(device), j_idx.to(device), i_clip.to(device), j_clip.to(device), area.to(device)
        #add singleton dimension t
        i_clip_cal = a_coefs[i_idx][:, None, None] * i_clip + b_coefs[i_idx][:, None, None]
        j_clip_cal = a_coefs[j_idx][:, None, None] * j_clip + b_coefs[j_idx][:, None, None]
        i_clip_cal_masked = i_clip_cal * mask
        j_clip_cal_masked = j_clip_cal * mask
        rel_loss = torch.mean(torch.abs(i_clip_cal_masked - j_clip_cal_masked))
        abs_loss = 0.0000001*(torch.mean(torch.abs(i_clip_cal_masked - j_clip_cal_masked))+torch.mean(torch.abs(j_clip_cal_masked - j_clip_cal_masked)))
        loss = rel_loss + abs_loss
        
        #print(f"{loss.item()} = {rel_loss.item()} + {abs_loss.item()}")
        #set pbar description
        pbar.set_description(f"loss: {loss.item()}")
        epoch_losses.append(loss.item())
        if torch.isnan(loss):
            break
        loss.backward()
        optimizer.step()
    print(f"Epoch loss: {np.mean(epoch_losses)}")
    break

Epoch 0/100000


loss: nan:  17%|█▋        | 17/101 [03:55<19:24, 13.86s/it]               

Epoch loss: nan





# TODO

Merge rasters

In [None]:
def average_merge(merged_data, new_data, merged_mask, new_mask, index=None, roff=None, coff=None):
    merged_data_masked = np.ma.array(merged_data, mask=merged_mask)
    merged_data[:] = np.ma.masked_array((merged_data_masked,new_data)).mean(axis=0)
rasters = []
for path in tqdm(glob(f"{GEOTIFF_CAL_DIR}/*.tiff"), desc="Loading rasters"):
    rasters.append(rxr.open_rasterio(path, masked=True).copy())
print("Merging...")
mosaic = merge_arrays(rasters, method=average_merge)
mosaic.rio.to_raster(f"{DATA_DIR}/mosaic_cal.tiff")
print("Merging done")