In [1]:
import geopandas as gpd
import tqdm
import os
import rasterio
import rasterio.merge
import rasterio.warp
import pandas as pd
import datetime
import numpy as np

In [2]:
import sys
sys.path.append('..')

In [3]:
import rsutils.utils as utils
import rsutils.s2_grid_utils as s2_grid_utils

In [4]:
outputs_folderpath = '../../data/outputs'
os.makedirs(outputs_folderpath, exist_ok=True)

In [5]:
bounding_filepath = '../../data/outputs/france_modis_bounding.geojson'
aez_geojson_filepath = '../../data/worldcereal/WorldCereal_AEZ.geojson'

In [6]:
bounds_gdf = gpd.read_file(bounding_filepath)
aez_gdf = gpd.read_file(aez_geojson_filepath)

In [7]:
worldcereal_aez_in_bound_gdf = gpd.sjoin(bounds_gdf, aez_gdf[
    ['geometry', 'aez_id', 
     'tc-maize-main_sos', 'tc-maize-main_eos',
     'tc-maize-second_sos', 'tc-maize-second_eos',]
]).reset_index(drop=True)
worldcereal_aez_in_bound_gdf.shape

(4, 7)

In [8]:
aez_ids = worldcereal_aez_in_bound_gdf['aez_id'].astype(str).to_list()
aez_ids

['43153', '43170', '22190', '46172']

In [9]:
worldcereal_tif_catalogue_data = {
    'aez': [],
    'season': [],
    'product': [],
    'startdate': [],
    'enddate': [],
    'type': [],
    'filepath': [],
}

data_folderpath = '../../data/worldcereal/'

for filepath in utils.get_all_files_in_folder(data_folderpath):
    filename = os.path.split(filepath)[1]
    if filename[-4:] != '.tif':
        continue
    filename_wo_ext = filename[:-4]
    aez, season, product, startdate, enddate, ftype = filename_wo_ext.split('_')
    worldcereal_tif_catalogue_data['aez'].append(str(aez))
    worldcereal_tif_catalogue_data['season'].append(season)
    worldcereal_tif_catalogue_data['product'].append(product)
    worldcereal_tif_catalogue_data['startdate'].append(datetime.datetime.strptime(startdate, '%Y-%m-%d'))
    worldcereal_tif_catalogue_data['enddate'].append(datetime.datetime.strptime(enddate, '%Y-%m-%d'))
    worldcereal_tif_catalogue_data['type'].append(ftype)
    worldcereal_tif_catalogue_data['filepath'].append(filepath)

worldcereal_tif_catalogue_df = pd.DataFrame(data=worldcereal_tif_catalogue_data)
worldcereal_tif_catalogue_df.shape

(233, 7)

In [10]:
ftype = 'classification'
selected_WC_catalogue_df = worldcereal_tif_catalogue_df[
    (worldcereal_tif_catalogue_df['type']==ftype)&\
    (worldcereal_tif_catalogue_df['aez'].isin(aez_ids))
].sort_values(by=['startdate', 'product']).reset_index(drop=True)
selected_WC_catalogue_df.shape

(11, 7)

In [11]:
selected_WC_catalogue_df

Unnamed: 0,aez,season,product,startdate,enddate,type,filepath
0,43153,tc-wintercereals,wintercereals,2020-10-27,2021-07-06,classification,../../data/worldcereal/WorldCereal_2021_tc-win...
1,43170,tc-wintercereals,wintercereals,2020-10-30,2021-07-16,classification,../../data/worldcereal/WorldCereal_2021_tc-win...
2,43170,tc-annual,temporarycrops,2020-10-31,2021-10-31,classification,../../data/worldcereal/WorldCereal_2021_tc-ann...
3,22190,tc-annual,temporarycrops,2020-11-01,2021-11-01,classification,../../data/worldcereal/WorldCereal_2021_tc-ann...
4,46172,tc-annual,temporarycrops,2020-11-06,2021-11-06,classification,../../data/worldcereal/WorldCereal_2021_tc-ann...
5,43153,tc-annual,temporarycrops,2020-11-09,2021-11-09,classification,../../data/worldcereal/WorldCereal_2021_tc-ann...
6,46172,tc-wintercereals,wintercereals,2020-12-31,2021-08-05,classification,../../data/worldcereal/WorldCereal_2021_tc-win...
7,22190,tc-wintercereals,wintercereals,2021-01-29,2021-08-12,classification,../../data/worldcereal/WorldCereal_2021_tc-win...
8,46172,tc-springcereals,springcereals,2021-03-28,2021-11-06,classification,../../data/worldcereal/WorldCereal_2021_tc-spr...
9,43170,tc-springcereals,springcereals,2021-03-31,2021-10-31,classification,../../data/worldcereal/WorldCereal_2021_tc-spr...


In [12]:
bounds_s2_grids_gdf = s2_grid_utils.get_s2_grids_gdf(
    geojson_epsg_4326=bounds_gdf['geometry'][0],
    res=4,
)
bounds_s2_grids_overlayed_gdf = gpd.overlay(bounds_gdf,bounds_s2_grids_gdf)
bounds_s2_grids_overlayed_gdf.shape

(11, 2)

In [13]:
bounds_s2_grids_overlayed_gdf.to_file(utils.modify_filepath(filepath=bounding_filepath, prefix='s2gridded_'))

In [14]:
def crop_tif_to_each_shape_and_save(
    src_filepath:str, 
    shapes_gdf:gpd.GeoDataFrame, 
    output_folderpath:str, 
    id_col:str,
    overwrite:bool=False,
):
    data = {
        id_col: [],
        'tif_filepath': [],
    }
    for index, row in tqdm.tqdm(shapes_gdf.iterrows(), total=shapes_gdf.shape[0]):
        _id = row[id_col]
        dst_folderpath = os.path.join(output_folderpath, _id)
        os.makedirs(dst_folderpath, exist_ok=True)
        dst_filepath = utils.modify_filepath(
            filepath=src_filepath,
            prefix=f'{_id}_',
            new_folderpath=dst_folderpath,
        )
        if os.path.exists(dst_filepath) and not overwrite:
            data['id'].append(_id)
            data['tif_filepath'].append(dst_filepath)
            continue
        try:
            out_image, out_meta = utils.crop_tif(
                src_filepath=src_filepath, 
                shapes_gdf=gpd.GeoDataFrame(data={'geometry':[row['geometry']]}, crs=shapes_gdf.crs),
            )
            with rasterio.open(dst_filepath, 'w', **out_meta) as dst:
                dst.write(out_image)
            del out_image, out_meta
        except ValueError as e:
            dst_filepath = None

        data['id'].append(_id)
        data['tif_filepath'].append(dst_filepath)

    tif_filepaths_df = pd.DataFrame(data=data)
    return tif_filepaths_df

In [15]:
s2_grid_cropped_tifs_dfs = {}

for index, row in tqdm.tqdm(
    selected_WC_catalogue_df.iterrows(), 
    total=selected_WC_catalogue_df.shape[0],
):
    season = row['season']
    product = row['product']
    aez = row['aez']

    print(product, season, aez)

    tif_filepath = row['filepath']

    cropped_tif_filepaths_df = crop_tif_to_each_shape_and_save(
        src_filepath=tif_filepath,
        shapes_gdf=bounds_s2_grids_overlayed_gdf,
        output_folderpath=os.path.join(outputs_folderpath, 's2_grid_level', 'worldcereal'),
        id_col='id',
        # overwrite=True,
    )

    s2_grid_cropped_tifs_dfs[(product, season, aez)] = cropped_tif_filepaths_df

  0%|          | 0/11 [00:00<?, ?it/s]

wintercereals tc-wintercereals 43153


100%|██████████| 11/11 [00:00<00:00, 359.48it/s]


wintercereals tc-wintercereals 43170


100%|██████████| 11/11 [00:00<00:00, 506.21it/s]


temporarycrops tc-annual 43170


100%|██████████| 11/11 [00:00<00:00, 516.55it/s]


temporarycrops tc-annual 22190


100%|██████████| 11/11 [00:00<00:00, 676.80it/s]


temporarycrops tc-annual 46172


100%|██████████| 11/11 [00:00<00:00, 7278.33it/s]
 45%|████▌     | 5/11 [00:00<00:00, 48.80it/s]

temporarycrops tc-annual 43153


100%|██████████| 11/11 [00:00<00:00, 1096.94it/s]


wintercereals tc-wintercereals 46172


100%|██████████| 11/11 [00:00<00:00, 4711.26it/s]


wintercereals tc-wintercereals 22190


100%|██████████| 11/11 [00:00<00:00, 599.07it/s]


springcereals tc-springcereals 46172


100%|██████████| 11/11 [00:00<00:00, 7612.17it/s]


springcereals tc-springcereals 43170


100%|██████████| 11/11 [00:00<00:00, 507.43it/s]


springcereals tc-springcereals 22190


100%|██████████| 11/11 [00:00<00:00, 594.60it/s]
100%|██████████| 11/11 [00:00<00:00, 59.39it/s]


In [None]:
s2_grid_cropped_tifs_dfs_list = []
for key, _df in s2_grid_cropped_tifs_dfs.items():
    product, season, aez = key
    _df['product'] = product
    _df['season'] = season
    _df['aez'] = str(aez)
    s2_grid_cropped_tifs_dfs_list.append(_df.dropna())

In [None]:
cropmask_s2grid_tifs_df = pd.concat(s2_grid_cropped_tifs_dfs_list).reset_index(drop=True)
cropmask_s2grid_tifs_df.shape

In [None]:
cropmask_s2grid_tifs_catalogue_filepath = os.path.join(outputs_folderpath, 'cropmask_s2grid_tifs_catalogue.csv')
cropmask_s2grid_tifs_df.to_csv(cropmask_s2grid_tifs_catalogue_filepath, index=False)

In [None]:
data = {
    'id': [],
    'product': [],
    'tif_filepath': [],
}

CROPMASK_NODATA = 255
CROPMASK_ISCROP = 100
CROPMASK_ISNOTCROP = 0

resampling = rasterio.merge.Resampling.nearest

overwrite = False

aggregated_cropmask_folderpath = os.path.join(outputs_folderpath, 's2_grid_level', 'aggregated_worldcereal')
os.makedirs(aggregated_cropmask_folderpath, exist_ok=True)

for _id, _product in tqdm.tqdm(set(zip(cropmask_s2grid_tifs_df['id'], cropmask_s2grid_tifs_df['product']))):
    _tif_filepaths = cropmask_s2grid_tifs_df[
        (cropmask_s2grid_tifs_df['id'] == _id) &
        (cropmask_s2grid_tifs_df['product'] == _product)
    ]['tif_filepath'].to_list()

    dst_filepath = os.path.join(aggregated_cropmask_folderpath, f'{_id}_{_product}.tif')

    if not os.path.exists(dst_filepath) or overwrite:
        out_image, out_transform = rasterio.merge.merge(
            _tif_filepaths,
            method=rasterio.merge.copy_max,
            resampling=resampling,
            nodata=CROPMASK_NODATA,
        )

        out_image[out_image != CROPMASK_ISCROP] = CROPMASK_ISNOTCROP

        with rasterio.open(_tif_filepaths[0]) as ref:
            out_meta = ref.meta.copy()

        out_meta.update({
            'count': out_image.shape[0],
            'height': out_image.shape[1],
            'width': out_image.shape[2],
            'transform': out_transform,
            'nodata': CROPMASK_NODATA,
            'compress':'lzw',
        })

        with rasterio.open(dst_filepath, 'w', **out_meta) as dst:
            dst.write(out_image)

        del out_image
    
    data['id'].append(_id)
    data['product'].append(_product)
    data['tif_filepath'].append(dst_filepath)

aggregated_cropmask_tif_filepaths_df = pd.DataFrame(data=data)
aggregated_cropmask_tif_filepaths_df.shape

In [None]:
# merging springcereals and wintercereals into cereal

pivoted_aggregated_cropmask_tif_filepaths_df = aggregated_cropmask_tif_filepaths_df.pivot(
    index=['id'], columns=['product'], values=['tif_filepath']
)

overwrite = False

for index, row in tqdm.tqdm(
    pivoted_aggregated_cropmask_tif_filepaths_df.iterrows(),
    total=pivoted_aggregated_cropmask_tif_filepaths_df.shape[0],
):
    _springcereals_tif_filepath = row[('tif_filepath', 'springcereals')]
    _wintercereals_tif_filepath = row[('tif_filepath', 'wintercereals')]
    _cereals_tif_filepath = os.path.join(aggregated_cropmask_folderpath, f'{index}_cereals.tif')

    if overwrite or not os.path.exists(_cereals_tif_filepath):
        with rasterio.open(_springcereals_tif_filepath) as ref:
            out_meta = ref.meta.copy()

        out_image, out_transform = rasterio.merge.merge(
            [_springcereals_tif_filepath, _wintercereals_tif_filepath],
            method=rasterio.merge.copy_max,
            resampling=resampling,
            nodata=CROPMASK_NODATA,
        )

        out_meta.update({
            'count': out_image.shape[0],
            'height': out_image.shape[1],
            'width': out_image.shape[2],
            'transform': out_transform,
            'nodata': CROPMASK_NODATA,
            'compress':'lzw',
        })

        with rasterio.open(_cereals_tif_filepath, 'w', **out_meta) as dst:
            dst.write(out_image)

        del out_image
    
    pivoted_aggregated_cropmask_tif_filepaths_df.loc[index, ('tif_filepath', 'cereals')] = _cereals_tif_filepath
    

In [None]:
aggregated_cropmask_tif_filepaths_df = \
pivoted_aggregated_cropmask_tif_filepaths_df.reset_index().melt(
    id_vars=[('id', '')]
).drop(
    columns=[None]
).rename(
    columns={('id',''): 'id', 'value': 'tif_filepath'}
)

In [None]:
aggregated_cropmask_tif_catalogue_filepath = os.path.join(outputs_folderpath, 'aggregated_cropmask_tif_filepaths_catalogue.csv')
aggregated_cropmask_tif_filepaths_df.to_csv(aggregated_cropmask_tif_catalogue_filepath, index=False)

In [None]:
def resample_WC_to_MODIS_and_save(
    ref_filepath:str,
    src_filepath:str,
    dst_filepath:str,
    resampling = rasterio.warp.Resampling.average,
):
    DST_NODATA = 0

    with rasterio.open(ref_filepath) as ref:
        ref_meta = ref.meta.copy()
    
    with rasterio.open(src_filepath) as src:
        src_meta = src.meta.copy()
        src_image = src.read(1)

    src_image[src_image!=100] = 0
    dst_image = np.zeros((ref_meta['height'], ref_meta['width']))
    
    rasterio.warp.reproject(
        source = src_image,
        destination = dst_image,
        src_transform = src_meta['transform'],
        dst_transform = ref_meta['transform'],
        src_nodata = src_meta['nodata'],
        dst_nodata = DST_NODATA,
        src_crs = src_meta['crs'],
        dst_crs = ref_meta['crs'],
        resampling = resampling,
    )

    ref_meta['nodata'] = DST_NODATA

    with rasterio.open(dst_filepath, 'w', **ref_meta) as dst:
        dst.write(np.expand_dims(dst_image, axis=0))


In [None]:
reference_geotiff = '../../data/GEOGLAM-BACS_v1.0.0/Percent_Spring_Wheat.tif'

data = {
    'id': [],
    'product': [],
    'tif_filepath': [],
}

overwrite = False

resampled_cropmasks_folderpath = os.path.join(outputs_folderpath, 's2_grid_level', 'resampled_cropmasks')
os.makedirs(resampled_cropmasks_folderpath, exist_ok=True)

for index, row in tqdm.tqdm(
    aggregated_cropmask_tif_filepaths_df.iterrows(), 
    total=aggregated_cropmask_tif_filepaths_df.shape[0],
):
    s2_grid_id = row['id']
    product = row['product']
    cropmask_filepath = row['tif_filepath']

    resampled_cropmask_filepath = os.path.join(resampled_cropmasks_folderpath, f'resampled_{s2_grid_id}_{product}.tif')

    if not os.path.exists(resampled_cropmask_filepath) or overwrite:
        resample_WC_to_MODIS_and_save(
            ref_filepath=reference_geotiff,
            src_filepath=cropmask_filepath,
            dst_filepath=resampled_cropmask_filepath,
            resampling=rasterio.merge.Resampling.average,
        )
    
    data['id'].append(s2_grid_id)
    data['product'].append(product)
    data['tif_filepath'].append(resampled_cropmask_filepath)

resampled_cropmask_s2grid_tifs_df = pd.DataFrame(data=data)

In [None]:
resampled_cropmask_catalogue_filepath = os.path.join(outputs_folderpath, 'resampled_cropmask_catalogue.csv')
resampled_cropmask_s2grid_tifs_df.to_csv(resampled_cropmask_catalogue_filepath, index=False)

In [None]:
merged_cropmask_folderpath = os.path.join(outputs_folderpath, 'merged_WC_cropmask')
os.makedirs(merged_cropmask_folderpath, exist_ok=True)

In [None]:
def merge_tifs(
    tif_filepaths:list[str], 
    dst_filepath:str, 
    bounds:tuple=None, 
    method=rasterio.merge.copy_max,
):
    with rasterio.open(tif_filepaths[0]) as src:
        out_meta = src.meta.copy()
    out_image, out_transform = rasterio.merge.merge(
        datasets=tif_filepaths,
        nodata=out_meta['nodata'],
        bounds=bounds,
        method=method,
    )
    out_meta.update({
        "driver": "GTiff",
        "height": out_image.shape[1],
        "width": out_image.shape[2],
        "transform": out_transform,
        "compress": "lzw",
    })
    with rasterio.open(dst_filepath, 'w', **out_meta) as dst:
        dst.write(out_image)

In [None]:
products = resampled_cropmask_s2grid_tifs_df['product'].unique()
products

In [None]:
bounds = tuple(bounds_gdf.bounds.iloc[0])
bounds

In [None]:
# merge resample modified cropmasks

data = {
    'product': [],
    'tif_filepath': [],
}

for product in tqdm.tqdm(products):
    _tif_filepaths = resampled_cropmask_s2grid_tifs_df[
        (resampled_cropmask_s2grid_tifs_df['product']==product)
    ]['tif_filepath'].to_list()
    merged_tif_filepath = os.path.join(merged_cropmask_folderpath, f'{product}.tif')
    merge_tifs(
        tif_filepaths=_tif_filepaths,
        dst_filepath=merged_tif_filepath,
        bounds=bounds,
    )
    data['product'].append(product)
    data['tif_filepath'].append(merged_tif_filepath)

merged_resampled_cropmask_catalogue_df = pd.DataFrame(data=data)