In [1]:
import geopandas as gpd
import datetime
import rasterio
import rasterio.warp
import os

In [2]:
import sys
sys.path.append('..')

In [4]:
import mysecrets
import fetch_from_cluster
import create_datacube
import rsutils.modify_images as modify_images
import rsutils.utils
import crop_and_zip

In [5]:
eth_s2grids_gdf = gpd.read_file('../../ethiopia/data/ethiopia_s2_grids_esa_stats.geojson')

In [6]:
S2GRIDS_OF_INTEREST = [
    '17b4c3c', # div 4
    '17b37fc', # div 2
    '164c59c', # corner
    '165bca4', # intersection of tiles with different crs
]

In [7]:
eth_s2grids_gdf.set_index('id').loc[
    S2GRIDS_OF_INTEREST, 'geometry'
].reset_index().to_file('../data/selected_eth_grids.geojson')

In [8]:
selected_s2grid_gdf = eth_s2grids_gdf[eth_s2grids_gdf['id'] == S2GRIDS_OF_INTEREST[3]][['id', 'geometry']]

In [10]:
shapes_gdf = selected_s2grid_gdf
startdate = datetime.datetime(2021, 3, 1)
enddate = datetime.datetime(2021, 4, 1)
sshcreds = mysecrets.SSH_UMD_SASIRAJANN
satellite_folderpath = '../data/cluster_files/satellite'
bands = ['B01', 'B02', 'B03', 'B04', 'B08']
overwrite_catalog = True
njobs = 4

In [11]:
catalog_filepath = fetch_from_cluster.remotepath_to_localpath(
    remotepath = fetch_from_cluster.FILEPATH_SENTINEL2_CATALOG,
    remote_root_path = fetch_from_cluster.FOLDERPATH_SATELLITE,
    local_root_path = satellite_folderpath,
)

fetch_from_cluster.download_file_from_cluster(
    sshcreds = sshcreds,
    remotepath = fetch_from_cluster.FILEPATH_SENTINEL2_CATALOG,
    download_filepath = catalog_filepath,
    overwrite = overwrite_catalog,
)

catalog_gdf = create_datacube.filter_catalog(
    catalog_filepath = catalog_filepath,
    shapes_gdf = shapes_gdf,
    startdate = startdate,
    enddate = enddate,
)

In [12]:
catalog_filepath

'../data/cluster_files/satellite/Sentinel-2/catalog.geojson'

In [14]:
full_catalog_gdf = gpd.read_file(catalog_filepath)

In [15]:
full_catalog_gdf

Unnamed: 0,id,timestamp,s3url,local_folderpath,files,last_update,cloud_cover,geometry
0,S2A_MSIL1C_20211224T074331_N0500_R092_T37PDK_2...,2021-12-24 07:57:40.190000+00:00,s3://EODATA/Sentinel-2/MSI/L1C_N0500/2021/12/2...,/gpfs/data1/cmongp2/sasirajann/fetch_satdata/d...,"B10.jp2,B09.jp2,B04.jp2,B8A.jp2,B05.jp2,B01.jp...",2024-08-20 13:54:40.022000+00:00,6.12,"MULTIPOLYGON (((38.16166 9.04578, 39.0888 9.04..."
1,S2B_MSIL1C_20210413T073609_N0500_R092_T37PEK_2...,2021-04-13 07:57:32.025000+00:00,s3://EODATA/Sentinel-2/MSI/L1C_N0500/2021/04/1...,/gpfs/data1/cmongp2/sasirajann/fetch_satdata/d...,"B10.jp2,B09.jp2,B04.jp2,B8A.jp2,B05.jp2,B01.jp...",2024-08-20 13:54:40.023000+00:00,18.33,"MULTIPOLYGON (((38.99983 9.04673, 39.99885 9.0..."
2,S2A_MSIL1C_20211114T074151_N0500_R092_T37PDK_2...,2021-11-14 07:57:40.238000+00:00,s3://EODATA/Sentinel-2/MSI/L1C_N0500/2021/11/1...,/gpfs/data1/cmongp2/sasirajann/fetch_satdata/d...,"B10.jp2,B09.jp2,B04.jp2,B8A.jp2,B05.jp2,B01.jp...",2024-08-20 13:54:40.025000+00:00,7.87,"MULTIPOLYGON (((38.16657 9.04579, 39.0888 9.04..."
3,S2A_MSIL1C_20211204T074301_N0500_R092_T37NEJ_2...,2021-12-04 07:57:49.492000+00:00,s3://EODATA/Sentinel-2/MSI/L1C_N0500/2021/12/0...,/gpfs/data1/cmongp2/sasirajann/fetch_satdata/d...,"B10.jp2,B09.jp2,B04.jp2,B8A.jp2,B05.jp2,B01.jp...",2024-08-20 13:54:40.027000+00:00,0.14,"MULTIPOLYGON (((38.99983 8.14203, 39.99648 8.1..."
4,S2B_MSIL1C_20211129T074149_N0500_R092_T37NDJ_2...,2021-11-29 07:57:48.816000+00:00,s3://EODATA/Sentinel-2/MSI/L1C_N0500/2021/11/2...,/gpfs/data1/cmongp2/sasirajann/fetch_satdata/d...,"B10.jp2,B09.jp2,B04.jp2,B8A.jp2,B05.jp2,B01.jp...",2024-08-20 13:54:40.028000+00:00,4.87,"MULTIPOLYGON (((38.09192 8.14101, 39.08859 8.1..."
...,...,...,...,...,...,...,...,...
364,S2B_MSIL1C_20210429T075609_N0500_R035_T36PZU_2...,2021-04-29 08:16:35.068000+00:00,s3://EODATA/Sentinel-2/MSI/L1C_N0500/2021/04/2...,/gpfs/data1/cmongp2/sasirajann/fetch_satdata/d...,"B8A.jp2,B04.jp2,B11.jp2,B02.jp2,B07.jp2,B05.jp...",2024-08-22 04:57:50.104000+00:00,66.62,"MULTIPOLYGON (((35.76151 12.65022, 36.68453 12..."
365,S2B_MSIL1C_20190305T073809_N0500_R092_T37PDK_2...,2019-03-05 07:57:36.552000+00:00,s3://EODATA/Sentinel-2/MSI/L1C_N0500/2019/03/0...,/gpfs/data1/cmongp2/sasirajann/fetch_satdata/d...,"B01.jp2,B02.jp2,B04.jp2,B07.jp2,B11.jp2,B09.jp...",2024-09-09 08:28:18.355000+00:00,34.40,"MULTIPOLYGON (((38.17604 9.04581, 39.0888 9.04..."
366,S2B_MSIL1C_20190305T073809_N0500_R092_T37NEJ_2...,2019-03-05 07:57:47.839000+00:00,s3://EODATA/Sentinel-2/MSI/L1C_N0500/2019/03/0...,/gpfs/data1/cmongp2/sasirajann/fetch_satdata/d...,"B01.jp2,B02.jp2,B04.jp2,B07.jp2,B11.jp2,B09.jp...",2024-09-09 08:28:36.212000+00:00,64.97,"MULTIPOLYGON (((38.99983 8.14203, 39.99648 8.1..."
367,S2B_MSIL1C_20190305T073809_N0500_R092_T37PEK_2...,2019-03-05 07:57:33.460000+00:00,s3://EODATA/Sentinel-2/MSI/L1C_N0500/2019/03/0...,/gpfs/data1/cmongp2/sasirajann/fetch_satdata/d...,"B01.jp2,B02.jp2,B04.jp2,B07.jp2,B11.jp2,B09.jp...",2024-09-09 08:28:52.547000+00:00,28.32,"MULTIPOLYGON (((38.99983 9.04673, 39.99885 9.0..."


In [13]:
catalog_gdf.shape

(48, 9)

In [None]:
catalog_gdf

In [None]:
selected_s2grid_gdf

In [None]:
band_filepaths_df \
= fetch_from_cluster.download_intersecting_sentinel2_tiles_from_cluster(
    shapes_gdf = shapes_gdf,
    startdate = startdate,
    enddate = enddate,
    sshcreds = sshcreds,
    satellite_folderpath = satellite_folderpath,
    bands = bands,
    overwrite_catalog = overwrite_catalog,
    njobs = njobs,
)

In [None]:
band_filepaths_df

In [None]:
catalog_gdf['local_folderpath'][0]

In [None]:
catalog_gdf['local_folderpath'] = catalog_gdf['local_folderpath'].apply(
    lambda folderpath: create_stack.change_parent_folderpath(
        filepath = folderpath,
        parent_folderpath = fetch_from_cluster.FOLDERPATH_SATELLITE,
        new_parent_folderpath = satellite_folderpath,
    )
)

In [None]:
local_catalog_filepath = rsutils.utils.modify_filepath(
    filepath = create_stack.change_parent_folderpath(
        filepath = fetch_from_cluster.FILEPATH_SENTINEL2_CATALOG,
        parent_folderpath = fetch_from_cluster.FOLDERPATH_SATELLITE,
        new_parent_folderpath = satellite_folderpath,
    ),
    prefix = 'local_'
)
local_catalog_filepath

In [None]:
catalog_gdf['local_folderpath'][0]

In [None]:
catalog_gdf.to_file(local_catalog_filepath)

In [None]:
# zip_filepath = crop_and_zip.crop_and_zip(
#     shapes_gdf = shapes_gdf,
#     catalog_filepath = local_catalog_filepath,
#     startdate = startdate,
#     enddate = enddate,
#     bands = bands,
#     zip_filepath = '../data/testing_crop_and_zip',
#     satellite_folderpath = satellite_folderpath,
# )

In [None]:
# zip_filepath

In [None]:
out_folderpath = '../data/testing_create_stack/resample/'

In [None]:
rsutils.utils.get_all_files_in_folder('../data', keep_extensions=['.jp2.aux.xml'])

In [None]:
[  
    os.remove(filepath) for filepath in
    rsutils.utils.get_all_files_in_folder('../data', keep_extensions=['.jp2.aux.xml'])
]

In [None]:
create_datacube.create_datacube(
    shapes_gdf = shapes_gdf,
    catalog_filepath = local_catalog_filepath,
    startdate = startdate,
    enddate = enddate,
    bands = bands,
    out_folderpath = '../data/testing_create_stack',
    nodata = 0,
    working_dir = '../data/testing_create_stack',
    njobs = njobs,
    dst_crs = None,
    resampling = rasterio.warp.Resampling.nearest,
    resampling_ref_band = 'B08',
)

In [None]:
datacube, metadata = create_datacube.load_datacube(folderpath='../data/testing_create_stack')

In [None]:
datacube.shape

In [None]:
metadata

In [None]:
def get_shape(filepath):
    with rasterio.open(filepath) as src:
        shape = (src.meta['height'], src.meta['width'])
    return shape

In [None]:
band_filepaths_df['new_shape'] = band_filepaths_df['filepath'].apply(get_shape)

In [None]:
band_filepaths_df['new_shape'].value_counts()

In [None]:
timestamps = band_filepaths_df['timestamp'].unique().tolist()
timestamps.sort()
len(timestamps)

In [None]:
timestamp_band_filepaths_dict = \
band_filepaths_df.groupby(['timestamp'])[
    ['band', 'filepath']
].apply(
    lambda g: dict(map(tuple, g.values.tolist()))
).to_dict()

In [None]:
bands

In [None]:
import numpy as np

In [None]:
stack = []
meta = None
for timestamp in timestamps:
    band_stack = []
    for band in bands:
        band_filepath = timestamp_band_filepaths_dict[timestamp][band]
        with rasterio.open(band_filepath) as src:
            if meta is None:
                meta = src.meta.copy()
            band_stack.append(src.read())
    band_stack = np.stack(band_stack, axis=-1)
    stack.append(band_stack)
    del band_stack
stack = np.concatenate(stack, axis=0)

In [None]:
stack.shape

In [None]:
del meta['driver']

In [None]:
meta

In [None]:
band_filepaths_df['shape'] = band_filepaths_df['filepath'].apply(get_shape)

In [None]:
unique_shapes = band_filepaths_df['shape'].unique()

In [None]:
# unique_shapes = band_filepaths_df['shape'].value_counts().index

In [None]:
unique_shapes

In [None]:
filepaths_to_merge = []

for shape in unique_shapes:
    filepath = band_filepaths_df[
        (band_filepaths_df['band'] == 'B08')
        & (band_filepaths_df['shape'] == shape)
    ]['filepath'].tolist()[0]
    filepaths_to_merge.append(filepath)

In [None]:
filepaths_to_merge

In [None]:
import rasterio.merge


out_image, out_transform = rasterio.merge.merge(
    filepaths_to_merge,
    nodata = 0,
)

In [None]:
out_image.shape

In [None]:
band_filepaths_df

In [None]:
band_filepaths_df['timestamp'].unique()

In [None]:
def get_image_crs(filepath:str):
    with rasterio.open(filepath) as src:
        crs = src.crs
    return str(crs)

In [None]:
with rasterio.open(band_filepaths_df['filepath'][0]) as src:
    meta = src.meta.copy()

meta

In [None]:
band_filepaths_df['crs'] = band_filepaths_df['filepath'].apply(get_image_crs)

In [None]:
max_area_contribution_crs = band_filepaths_df.groupby(by='crs')['area_contribution'].mean().sort_values(ascending=False).index[0]
max_area_contribution_crs

In [None]:
test_filepath = band_filepaths_df[
    (band_filepaths_df['crs'] == max_area_contribution_crs)
    & (band_filepaths_df['band'] == 'B08')
]['filepath'].tolist()[0]

In [None]:
stack_name = '165bca4'

working_dir = f'../data/test_stack_creation/{stack_name}'
os.makedirs(working_dir, exist_ok=True)

In [None]:
modify_images.modify_image(
    src_filepath = test_filepath,
    dst_filepath = '../data/test_out_crop_reproject_crop_samecrs_B08.jp2',
    sequence = [
        (modify_images.crop, dict(shapes_gdf=shapes_gdf, nodata=0)),
        (modify_images.reproject, dict(dst_crs=max_area_contribution_crs)),
        (modify_images.crop, dict(shapes_gdf=shapes_gdf, nodata=0)),
    ]
)

In [None]:
def change_parent_folderpath(
    filepath:str,
    parent_folderpath:str,
    new_parent_folderpath:str,
):
    filepath = os.path.abspath(filepath)
    parent_folderpath = os.path.abspath(parent_folderpath)
    return os.path.join(
        new_parent_folderpath,
        os.path.relpath(path=filepath,
                        start=parent_folderpath)
    )

In [None]:
change_parent_folderpath(
    filepath = '../data/1/2/3/4/blah.gif',
    parent_folderpath = '../data/1/2',
    new_parent_folderpath = '../data/a/b/'
)

In [None]:
cropped_imgs_folderpath = os.path.join(working_dir, 'cropped')
os.makedirs(cropped_imgs_folderpath, exist_ok=True)

In [None]:
def generate_cropped_filepath(
    src_filepath:str,
    parent_folderpath:str,
    _id:str,
):
    crs = get_image_crs(filepath=src_filepath)
    crs_str = crs.lower().replace(':', '-')
    dst_filepath = rsutils.utils.modify_filepath(
        filepath = src_filepath,
        new_folderpath = os.path.join(
            parent_folderpath, 
            crs_str,
            _id,
        )
    )
    return dst_filepath

In [None]:
band_filepaths_df['cropped_filepath'] = \
band_filepaths_df.apply(
    lambda row: generate_cropped_filepath(
        src_filepath = row['filepath'],
        parent_folderpath = cropped_imgs_folderpath,
        _id = row['id'],
    ),
    axis = 1
)

In [None]:
band_filepaths_df

In [None]:
cropped_successes = create_stack.crop_and_save_images(
    src_filepaths = band_filepaths_df['filepath'],
    dst_filepaths = band_filepaths_df['cropped_filepath'],
    shapes_gdf = shapes_gdf,
)

In [None]:
if not all(cropped_successes):
    raise ValueError('Not all cropping was a success.')

In [None]:
max_area_contribution_crs

In [None]:
indexes_to_reproject = band_filepaths_df[
    band_filepaths_df['crs'] != max_area_contribution_crs 
].index

In [None]:
indexes_to_reproject

In [None]:
def generate_reprojected_filepath(
    filepath,
    from_crs,
    to_crs,
    _id,
    parent_folderpath,
):
    return rsutils.utils.modify_filepath(
        filepath = filepath,
        new_folderpath = os.path.join(
            parent_folderpath,
            f"{str(from_crs).lower().replace(':', '-')}_to_{str(to_crs).lower().replace(':', '-')}",
            _id,
        )
    )

In [None]:
reprojected_folderpath = os.path.join(working_dir, 'reprojected')
os.makedirs(reprojected_folderpath, exist_ok=True)

In [None]:
band_filepaths_df.loc[
    indexes_to_reproject,
    'reprojected_filepath'
] = band_filepaths_df.apply(
    lambda row: generate_reprojected_filepath(
        filepath = row['cropped_filepath'],
        from_crs = row['crs'],
        to_crs = max_area_contribution_crs,
        _id = row['id'],
        parent_folderpath = reprojected_folderpath,
    ),
    axis = 1,
)

In [None]:
band_filepaths_df

In [None]:
reproject_successes = create_stack.reproject_images(
    src_filepaths = band_filepaths_df.loc[indexes_to_reproject, 'cropped_filepath'],
    dst_filepaths = band_filepaths_df.loc[indexes_to_reproject, 'reprojected_filepath'],
    dst_crs = max_area_contribution_crs,
    resampling = rasterio.warp.Resampling.nearest,
)

In [None]:
create_stack.reproject_image(
    src_filepath = 
)