In [1]:
import relative_imports
from spatiotemporal.SEN12MSCRTS import DatasetManager
from tqdm.notebook import tqdm
import pandas as pd
import numpy as np

In [2]:
root_dir='/LOCAL2/shvl/datasets/cloud_removal/SEN12MSCRTS'

In [3]:
dataset_manager = DatasetManager(
    root_dir=root_dir
)
# dataset_manager.load_dataset()
# dataset_manager.save_to_file()
dataset_manager.load_from_file()

# Check resampling along timestep

All Sentinel-1 and Sentinel-2 images that belong to the same patch must be resampled to the same grid. We will check whether this holds for all patches in the dataset.

Note, that if a single patch is affected across all timesteps, it most certainly means that the whole tile is affected.

Thus, we check individual patches, but extend these results to the whole tile where the patches come from.

### Read geotransform and crs

In [4]:
# with dataset_manager.stats.progressbar(tqdm), dataset_manager.stats.force_overwrite(), dataset_manager.stats.force_compute():
with dataset_manager.stats.progressbar(tqdm):
    crs_transform_df = dataset_manager.stats.crs_transform

crs_transform_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,modality,S1,S1,S1,S1,S1,S1,S1,S2,S2,S2,S2,S2,S2,S2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,info,crs,0,1,2,3,4,5,crs,0,1,2,3,4,5
ROI,tile,patch,timestep,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
ROIs1158,106,0,0,EPSG:32611,10.00000,0.0,517364.935995,0.0,-10.00000,3.837929e+06,EPSG:32611,10.00000,0.0,517360.000000,0.0,-10.00000,3.837930e+06
ROIs1158,106,0,1,EPSG:32611,10.00000,0.0,517359.256942,0.0,-10.00000,3.837931e+06,EPSG:32611,10.00000,0.0,517360.000000,0.0,-10.00000,3.837930e+06
ROIs1158,106,0,2,EPSG:32611,10.00000,0.0,517365.936254,0.0,-10.00000,3.837935e+06,EPSG:32611,10.00000,0.0,517360.000000,0.0,-10.00000,3.837930e+06
ROIs1158,106,0,3,EPSG:32611,10.00000,0.0,517363.546942,0.0,-10.00000,3.837931e+06,EPSG:32611,10.00000,0.0,517360.000000,0.0,-10.00000,3.837930e+06
ROIs1158,106,0,4,EPSG:32611,10.00000,0.0,517365.074063,0.0,-10.00000,3.837935e+06,EPSG:32611,10.00000,0.0,517360.000000,0.0,-10.00000,3.837930e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ROIs2017,146,299,25,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,4.175351e+01,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,4.175351e+01
ROIs2017,146,299,26,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,4.175351e+01,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,4.175351e+01
ROIs2017,146,299,27,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,4.175351e+01,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,4.175351e+01
ROIs2017,146,299,28,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,4.175351e+01,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,4.175351e+01


In [16]:
print(f"CRS and Transforms missing: \033[1m{dataset_manager.data.index.difference(crs_transform_df.index).size}")

CRS and Transforms missing: [1m0


In [17]:
print(f"Unique S2 CRS: {crs_transform_df['S2', 'crs'].unique()}")

Unique S2 CRS: ['EPSG:32611' 'EPSG:4326' 'EPSG:32755' 'EPSG:32735']


In [18]:
print(f"Unique S1 CRS: {crs_transform_df['S1', 'crs'].unique()}")

Unique S1 CRS: ['EPSG:32611' 'EPSG:4326' 'EPSG:32755' nan 'EPSG:32735']


### Check Sentinel-2 timesteps

In [19]:
with dataset_manager.stats.progressbar(tqdm), dataset_manager.stats.force_overwrite(), dataset_manager.stats.force_compute():
    s2_tiles_resampled = dataset_manager.stats.S2_resampled
    
s2_tiles_resampled

Compute S2_resampled for the whole dataset...


Unnamed: 0_level_0,Unnamed: 1_level_0,S2_resampled
ROI,tile,Unnamed: 2_level_1
ROIs1158,106,True
ROIs1868,17,True
ROIs1868,36,True
ROIs1868,56,True
ROIs1868,73,True
ROIs1868,85,True
ROIs1868,100,True
ROIs1868,114,True
ROIs1868,119,True
ROIs1868,121,True


### Check Sentinel-1 timesteps

In [41]:
with dataset_manager.stats.progressbar(tqdm), dataset_manager.stats.force_overwrite(), dataset_manager.stats.force_compute():
    s1_tiles_resampling = dataset_manager.stats.S1_resampled
    
s1_tiles_resampling

Compute S1_resampled for the whole dataset...


Unnamed: 0_level_0,Unnamed: 1_level_0,S1_resampled
ROI,tile,Unnamed: 2_level_1
ROIs1158,106,False
ROIs1868,17,True
ROIs1868,36,True
ROIs1868,56,True
ROIs1868,73,False
ROIs1868,85,True
ROIs1868,100,True
ROIs1868,114,False
ROIs1868,119,True
ROIs1868,121,True


In [42]:
s1_tiles_not_resampled = s1_tiles_resampling[s1_tiles_resampling != True].dropna()
s1_tiles_resampled = s1_tiles_resampling[s1_tiles_resampling == True].dropna()
s1_tiles_not_resampled

Unnamed: 0_level_0,Unnamed: 1_level_0,S1_resampled
ROI,tile,Unnamed: 2_level_1
ROIs1158,106,False
ROIs1868,73,False
ROIs1868,114,False
ROIs1868,143,False
ROIs2017,25,False
ROIs2017,32,False


In [43]:
s1_not_resampled = crs_transform_df[crs_transform_df.index.droplevel(["patch", "timestep"]).isin(s1_tiles_not_resampled.index)]
s1_not_resampled

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,modality,S1,S1,S1,S1,S1,S1,S1,S2,S2,S2,S2,S2,S2,S2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,info,crs,0,1,2,3,4,5,crs,0,1,2,3,4,5
ROI,tile,patch,timestep,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
ROIs1158,106,0,0,EPSG:32611,10.0,0.0,517364.935995,0.0,-10.0,3.837929e+06,EPSG:32611,10.0,0.0,517360.0,0.0,-10.0,3837930.0
ROIs1158,106,0,1,EPSG:32611,10.0,0.0,517359.256942,0.0,-10.0,3.837931e+06,EPSG:32611,10.0,0.0,517360.0,0.0,-10.0,3837930.0
ROIs1158,106,0,2,EPSG:32611,10.0,0.0,517365.936254,0.0,-10.0,3.837935e+06,EPSG:32611,10.0,0.0,517360.0,0.0,-10.0,3837930.0
ROIs1158,106,0,3,EPSG:32611,10.0,0.0,517363.546942,0.0,-10.0,3.837931e+06,EPSG:32611,10.0,0.0,517360.0,0.0,-10.0,3837930.0
ROIs1158,106,0,4,EPSG:32611,10.0,0.0,517365.074063,0.0,-10.0,3.837935e+06,EPSG:32611,10.0,0.0,517360.0,0.0,-10.0,3837930.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ROIs2017,32,237,25,EPSG:32735,10.0,0.0,343508.870199,0.0,-10.0,6.236584e+06,EPSG:32735,10.0,0.0,343510.0,0.0,-10.0,6236590.0
ROIs2017,32,237,26,EPSG:32735,10.0,0.0,343508.345315,0.0,-10.0,6.236584e+06,EPSG:32735,10.0,0.0,343510.0,0.0,-10.0,6236590.0
ROIs2017,32,237,27,EPSG:32735,10.0,0.0,343511.565564,0.0,-10.0,6.236587e+06,EPSG:32735,10.0,0.0,343510.0,0.0,-10.0,6236590.0
ROIs2017,32,237,28,EPSG:32735,10.0,0.0,343509.634779,0.0,-10.0,6.236586e+06,EPSG:32735,10.0,0.0,343510.0,0.0,-10.0,6236590.0


In [44]:
s1_resampled = crs_transform_df[crs_transform_df.index.droplevel(["patch", "timestep"]).isin(s1_tiles_resampled.index)]
s1_resampled

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,modality,S1,S1,S1,S1,S1,S1,S1,S2,S2,S2,S2,S2,S2,S2
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,info,crs,0,1,2,3,4,5,crs,0,1,2,3,4,5
ROI,tile,patch,timestep,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2
ROIs1868,17,0,0,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421
ROIs1868,17,0,1,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421
ROIs1868,17,0,2,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421
ROIs1868,17,0,3,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421
ROIs1868,17,0,4,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421,EPSG:4326,0.00009,0.0,3.734207,0.0,-0.00009,49.961421
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ROIs2017,146,299,25,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,41.753515,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,41.753515
ROIs2017,146,299,26,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,41.753515,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,41.753515
ROIs2017,146,299,27,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,41.753515,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,41.753515
ROIs2017,146,299,28,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,41.753515,EPSG:4326,0.00009,0.0,13.149449,0.0,-0.00009,41.753515


In [50]:
s1_resampled["S1", "crs"].unique(), s1_resampled["S2", "crs"].unique()

(array(['EPSG:4326'], dtype=object), array(['EPSG:4326'], dtype=object))

# Check if S1 and S2 resampling is same

In [53]:
(s1_resampled["S1"] == s1_resampled["S2"]).all()

info
crs    True
0      True
1      True
2      True
3      True
4      True
5      True
dtype: bool