In [None]:
import os
import shutil
import pandas as pd
import geopandas as gpd
import rasterio
import s2cloudless
import numpy as np
import rasterio.warp

In [None]:
import sys
sys.path.append('..')

In [None]:
import mysecrets
import fetch_from_cluster
import modify_images
import rsutils.utils
import create_stack

In [None]:
catalog_gdf = gpd.read_file('../data/cluster_files/satellite/Sentinel-2/catalog.geojson')

In [None]:
suffiently_cloudy_ids = catalog_gdf[
    (catalog_gdf['cloud_cover'] < 90) 
    & (catalog_gdf['cloud_cover'] > 60)
]['id'].to_list()

In [None]:
len(suffiently_cloudy_ids)

In [None]:
zip_filepath = fetch_from_cluster.download_file_from_cluster(
    sshcreds = mysecrets.SSH_UMD_SASIRAJANN,
    remotepath = '/gpfs/data1/cmongp2/sasirajann/data/misc/165bca4_s2l1c.zip',
    download_folderpath = '../data/cluster_files',
    # overwrite = True,
)

In [None]:
cropped_folderpath = '../data/165bca4_s2l1c_2'

In [None]:
shutil.unpack_archive(zip_filepath, cropped_folderpath)

In [None]:
catalog_filepath = os.path.join(cropped_folderpath, 'catalog.csv')

In [None]:
catalog_df = pd.read_csv(catalog_filepath)

In [None]:
catalog_df['filepath'] = catalog_df['filepath'].apply(lambda x: os.path.join(cropped_folderpath, x))

In [None]:
selected_catalog_df = catalog_df[
    (catalog_df['area_contribution'] == 100)
    & (catalog_df['id'].isin(suffiently_cloudy_ids))
]
selected_catalog_df

In [None]:
id_band_filepath_dict = selected_catalog_df.groupby('id')[
    ['band', 'filepath']
].apply(
    lambda g: dict(map(tuple, g.values.tolist()))
).to_dict()

In [None]:
selected_catalog_df.groupby('id')[
    ['band', 'filepath']
].apply(
    lambda g: dict(map(tuple, g.values.tolist()))
).to_list()

In [None]:
key_0 = list(id_band_filepath_dict.keys())[0]
key_0

In [None]:
band_filepath_dict = id_band_filepath_dict[key_0]
band_filepath_dict

In [None]:
eth_s2grids_gdf = gpd.read_file('../../ethiopia/data/ethiopia_s2_grids_esa_stats.geojson')

S2GRIDS_OF_INTEREST = [
    '17b4c3c', # div 4
    '17b37fc', # div 2
    '164c59c', # corner
    '165bca4', # intersection of tiles with different crs
]

shapes_gdf = eth_s2grids_gdf[eth_s2grids_gdf['id'] == S2GRIDS_OF_INTEREST[3]][['id', 'geometry']]

In [None]:
modify_images.modify_image(
    src_filepath = band_filepath_dict['B01'],
    dst_filepath = '../data/testing_resample_crop_B01.jp2',
    sequence = [
        (modify_images.resample_by_ref, dict(ref_filepath = band_filepath_dict['B08'],
                                             resampling = rasterio.warp.Resampling.nearest)),
        (modify_images.crop, dict(shapes_gdf=shapes_gdf, nodata=0, all_touched=True))
    ],
)

In [None]:
resampled_folderpath = os.path.join('../data/testing_resampling/')
os.makedirs(resampled_folderpath, exist_ok=True)

In [None]:
sentinel2_bands = [
    'B01', 'B02', 'B03', 'B04',
    'B05', 'B06', 'B07', 'B08',
    'B8A', 'B09', 'B10', 'B11', 
    'B12',
]
sentinel2_bands

In [None]:
resampled_band_filepath_dict = {}
for band in sentinel2_bands:
    band_filepath = band_filepath_dict[band]
    resampled_filepath = \
    rsutils.utils.modify_filepath(
        filepath = band_filepath,
        new_folderpath = resampled_folderpath,
    )
    resampled_band_filepath_dict[band] = resampled_filepath
    modify_images.modify_image(
        src_filepath = band_filepath,
        dst_filepath = resampled_filepath,
        sequence = [
            (modify_images.resample_by_ref, dict(ref_filepath = band_filepath_dict['B08'],
                                                resampling = rasterio.warp.Resampling.nearest)),
            (modify_images.crop, dict(shapes_gdf=shapes_gdf, nodata=0, all_touched=True))
        ],
    )

In [None]:
resampled_band_filepath_dict

In [None]:
for band in sentinel2_bands:
    with rasterio.open(resampled_band_filepath_dict[band]) as src:
        print(src.meta['height'], src.meta['width'])

In [None]:
sentinel2_bands

In [None]:
s2cloudless_bands = [
    'B01', 'B02', 'B04', 'B05', 'B08',
    'B8A', 'B09', 'B10', 'B11', 'B12',
]
# 1, 2, 4, 5, 8, 8A, 9, 10, 11, 12

In [None]:
band_stack = []

for band in s2cloudless_bands:
    band_filepath = resampled_band_filepath_dict[band]
    with rasterio.open(band_filepath) as src:
        band_stack.append(src.read())
        out_meta = src.meta.copy()

In [None]:
out_meta

In [None]:
band_stack_ndarray = np.stack(band_stack, axis=-1)

In [None]:
band_stack_ndarray.shape

In [None]:
cloud_detector = s2cloudless.S2PixelCloudDetector(
    threshold=0.4, average_over=4, dilation_size=2, all_bands=False
)

In [None]:
# https://stackoverflow.com/questions/77783414/using-s2cloudless-to-generate-cloud-mask-using-sentinel-2-l1c-raw-data
# https://forum.step.esa.int/t/info-introduction-of-additional-radiometric-offset-in-pb04-00-products/35431
# https://sentiwiki.copernicus.eu/web/s2-processing
QUANTIFICATION_VALUE = 10000
RADIO_ADD_OFFSET = -1000

In [None]:
cmk = cloud_detector.get_cloud_masks(data=(band_stack_ndarray + RADIO_ADD_OFFSET) / QUANTIFICATION_VALUE)

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.hist(cmk[cmk > 0].flatten())

In [None]:
cmk

In [None]:
(cmk == 1).sum()

In [None]:
out_meta['dtype'] = cmk.dtype

In [None]:
out_meta = rsutils.utils.driver_specific_meta_updates(meta=out_meta)
out_meta

In [None]:
with rasterio.open(
    os.path.join(resampled_folderpath, 'CMK.jp2'),
    'w',
    **out_meta,
) as dst:
    dst.write(cmk)