In [None]:
import os

In [None]:
import geopandas as gpd
import numpy as np
import pandas as pd
from eolearn.core import EOPatch
from fs_s3fs import S3FS
from sentinelhub import CRS, SHConfig
from tqdm.auto import tqdm

In [None]:
config = SHConfig()
config.aws_access_key_id = ''
config.aws_secret_access_key = ''

In [None]:
filesystem = S3FS(bucket_name='',
                  aws_access_key_id=config.aws_access_key_id,
                  aws_secret_access_key=config.aws_secret_access_key)

In [None]:
data_df = pd.read_parquet(filesystem.openbin('metadata/npz_info_small.pq'))

In [None]:
data_df

In [None]:
DIR_DEIMOS = ''

In [None]:
MAX_CC = 0.05

In [None]:
def cloudy_idxs_deimos(eop, max_cc, threshold=100):

    idxs = []
    for i, ts in enumerate(eop.timestamp):

        float(eop.meta_info['metadata'][ts]['MS4']['PHYSICAL_INFO'][f'PHYSICAL_GAIN_4'])
        float(eop.meta_info['metadata'][ts]['MS4']['PHYSICAL_INFO'][f'PHYSICAL_BIAS_4'])
        is_data_mask = eop.mask['IS_DATA'][i].squeeze()
        cloud_coverage = eop.mask['CLM'][i][is_data_mask].mean()
        if cloud_coverage > max_cc:
            idxs.append(i)

    return idxs

In [None]:
cloud_info = []
for eop_name in tqdm(filesystem.listdir(DIR_DEIMOS)):
    eop = EOPatch.load(os.path.join(DIR_DEIMOS, eop_name), filesystem=filesystem, lazy_loading=True)
    cloudy = cloudy_idxs_deimos(eop, MAX_CC)
    info = dict(eop_name=eop_name,
                bbox=eop.bbox,
                geometry=eop.bbox.geometry,
                crs=eop.bbox.crs,
                cloudy_timestamps=np.array(eop.timestamp)[cloudy])
    cloud_info.append(info)

In [None]:
df = pd.DataFrame(cloud_info)
df.to_pickle('cloud_info.pkl')

In [None]:
gdf_cyprus = gpd.GeoDataFrame(df[df.crs == CRS('32636')], crs='EPSG:32636')
gdf_lithuania = gpd.GeoDataFrame(df[df.crs == CRS('32634')], crs='EPSG:32634')

In [None]:
gdf_cyprus

In [None]:
def get_neighbouring_eops(gdf):
    for index, row in gdf.iterrows():
        # get 'not disjoint' countries
        neighbors = gdf[~gdf.geometry.disjoint(row.geometry)].eop_name.tolist()

        # remove own name of the country from the list
        neighbors = [name for name in neighbors if row.eop_name != name]

        # add names of neighbors as NEIGHBORS value
        gdf.at[index, "neighbouring_eops"] = ", ".join(neighbors)
    return gdf

In [None]:
gdf_lithuania = get_neighbouring_eops(gdf_lithuania)
gdf_cyprus = get_neighbouring_eops(gdf_cyprus)

In [None]:
gdf_cyprus.head(300)

In [None]:
def get_cloudy_eop_timestamps(gdf):
    cloudy_eop_timestamps = []
    for _, row in gdf.iterrows():
        for cloudy_timestamp in row.cloudy_timestamps:
            for neighbour in row.neighbouring_eops.split(','):
                cloudy_eop_timestamps.append((cloudy_timestamp, neighbour.strip()))
    return set(cloudy_eop_timestamps)

In [None]:
cloudy_cyprus = get_cloudy_eop_timestamps(gdf_cyprus)
cloudy_lithuania = get_cloudy_eop_timestamps(gdf_lithuania)

In [None]:
def is_shadow_v2(eopatch, timestamp_deimos, country):

    if country == 'Lithuania':
        return (timestamp_deimos.to_pydatetime(), eopatch) in cloudy_lithuania
    elif country == 'Cyprus':
        return (timestamp_deimos.to_pydatetime(), eopatch) in cloudy_cyprus
    else:
        raise ValueError("Wrong country")

In [None]:
data_df

In [None]:
data_df['is_shadow_v2'] = data_df.apply(lambda x: is_shadow_v2(x.eopatch, x.timestamp_deimos, x.countries), axis=1)

In [None]:
data_df.is_shadow_v2.value_counts()

In [None]:
with filesystem.openbin('metadata/npz_info_small.pq', 'wb') as f:
    data_df.to_parquet(f)