In [None]:
import os
from pathlib import Path
import shutil
import logging
import re
import tempfile

import boto3
import netCDF4
import pandas as pd
import geopandas as gpd
import yaml

import s3vtdatamgt_EUMETSATftp as eufr

### Logging configuration for the Notebook

In [None]:
logger = logging.getLogger()
logger.setLevel(level=logging.INFO)
logging.basicConfig(filename='eumetsat_process.log',level=logging.INFO)

### Configure Parameters from the configuration file
The first configuration detail is used as a default

In [None]:
config_file = "/home/jovyan/s3vt/config.yaml"
with open(config_file) as fid:
    configs = yaml.load(fid, Loader=yaml.FullLoader)
    configs = configs["configurations"][0] # takes the first configuration details 
    ftp_username = configs["eumetsatftpusername"]
    ftp_password = configs["eumetsatftppassword"]
    ftp_url = configs["eumetsatftpurl"]
    ftp_directory = configs["eumetsatftpdirectory"]
    aoi = configs["aoi"]
    aws_access_key_id = configs["awskeyid"]
    aws_secret_access_key = configs["awskeypass"]
    s3_bucket_name = configs["awss3bucket"]

In [None]:
print(configs)

### Create a workdir
Creates a working directory `workdir_s3vt` in current working directory if not explicitly specified. Change to the working directory.\
The outputs from this excercise will be within the specified work directory.

In [None]:
workdir =  Path(os.getcwd()).joinpath("workdir_s3vt")
workdir.mkdir(exist_ok=True)
os.chdir(str(workdir))

### Listing of FRP products from the Eumetsat's FTP site
If `.csv` file containing the listing of FRP products with `.SEN3` suffix from Eumetsat's site is available then provide a full path in place \
of `None` in `eumetsat_ftp_frp_list_file` variable. This listing from FTP sites takes a while. You can use the `eumetsat_ftp_frp_list.csv` to skip
this part.

In [None]:
eumetsat_ftp_frp_list_file = "/home/jovyan/s3vt/eumetsat_ftp_frp_list.csv" # use None or /home/jovyan/s3vt/eumetsat_ftp_frp_list.csv if not update of file is made in Eumetsat's FTP sie.
if eumetsat_ftp_frp_list_file is not None:
    if Path(eumetsat_ftp_frp_list_file).exists():
        try:
            shutil.copy(eumetsat_ftp_frp_list_file, workdir)
        except shutil.SameFileError:
            logger.info(f"{eumetsat_ftp_frp_list_file} points to same file: will read from same file")
        eumetsat_ftp_frp_list_file = Path(workdir).joinpath(Path(eumetsat_ftp_frp_list_file).name)
        
# get ftp listing of eumetsat's site for FRP products
eumetsat_ftp_frp_list_file = eufr._get_eumetsat_ftp_listing(
    ftp_username,
    ftp_password,
    ftp_url,
    ftp_directory,
    eumetsat_ftp_frp_list_file
)

### Listing of ESA's FRP products from AWS S3
If `.csv` file containing the listing of ESA's FRP from AWS S3 exists then provide a full path in place of `None` in `esa_s3_sen3_frp_list_file` variable

In [None]:
esa_s3_sen3_frp_list_file = None
if esa_s3_sen3_frp_list_file is not None:
    if Path(esa_s3_sen3_frp_list_file).exists():
        try:
            shutil.copy(esa_s3_sen3_frp_list_file, workdir)
        except shutil.SameFileError:
            logger.info(f"{esa_s3_sen3_frp_list_file} points to same file: will read from same file")
        esa_s3_sen3_frp_list_file = Path(workdir).joinpath(Path(esa_s3_sen3_frp_list_file).name)

# get s3 listing of ESA's FRP from s3 with .SEN3 extension
# The Eumetsat's FRP product are assumed to be in AWS S3 bucket at s3://{bucketn_name}/eumetsat_data/
esa_s3_sen3_frp_list_file = eufr._get_esa_s3_listing(
    aws_access_key_id,
    aws_secret_access_key,
    s3_bucket_name,
    esa_s3_sen3_frp_list_file,
    exclude_s3_key='eumetsat_data/',  # this key is to exclude the s3 folder where eumetsat's FRP product resides.
    match_suffix='.SEN3'
)

### Listing of Eumetsat's FRP products from AWS S3
If `.csv` file containing the listing of Eumetsat's FRP with `.SEN3` suffix from AWS S3 exists then set full path at `eumetsat_s3_sen3_frp_list_file`\
The `.csv` file containing the listing of Eumetsat's FRP with `.geojson` suffix will be searched where filename is expected to be same with `_geojson_` instead of `_sen3_`.
For example, `eumetsat_s3_frp_geojson_list.csv` for file listing geojson files and `eumetsat_s3_frp_sen3_list.csv` for `.SEN3` files should reside within same folder.


In [None]:
eumetsat_s3_sen3_frp_list_file = None
eumet_s3_geojson_list_file = None
if eumetsat_s3_sen3_frp_list_file is not None:
    if Path(eumetsat_s3_sen3_frp_list_file).exists():
        try:
            shutil.copy(eumetsat_s3_sen3_frp_list_file, workdir)
        except shutil.SameFileError:
            logger.info(f"{eumetsat_s3_sen3_frp_list_file} points to same file: will read from same file")
        # if eumetsat_s3_frp_list_file exists, check if geojson file list for eumetsat's exists
        # the file has same name with `geojson` instead of `sen3`.
        eumet_geojson_list = re.sub("_sen3_", "_geojson_", Path(eumetsat_s3_frp_list_file).name)
        if eumet_geojson_list:
            eumet_s3_geojson_list_file = Path(eumetsat_s3_frp_list_file).parent.joinpath(eumet_geojson_list[0])
            if eumet_s3_geojson_list_file.exists():
                try:
                    shutil.copy(eumet_geojson_list_file, workdir)
                except shutil.SameFileError:
                    logger.info(f"{eumet_geojson_list_file} points to same file: will read from same file")
                eumet_s3_geojson_list_file = Path(workdir).joinpath(Path(eumet_geojson_list_file).name)
        eumetsat_s3_sen3_frp_list_file = Path(workdir).joinpath(Path(eumetsat_s3_frp_list_file).name)


In [None]:
# get s3 listing of eumetsat's FRP from s3 with .SEN3 extension
# The ESA's FRP product are assumed to be in AWS S3 bucket at s3://{bucketn_name}/data/
eumetsat_s3_sen3_frp_list_file = eufr._get_eumetsat_s3_listing(
    aws_access_key_id,
    aws_secret_access_key,
    s3_bucket_name,
    eumetsat_s3_sen3_frp_list_file,
    exclude_s3_key='/data/', # key to exlude s3 bucket where esa's FRP are stored.
    match_suffix='.SEN3'
)

In [None]:
# get s3 listing of eumetsat's FRP from s3 with .geojson extension
# The ESA's FRP product are assumed to be in AWS S3 bucket at s3://{bucketn_name}/data/
eumet_s3_geojson_list_file = eufr._get_eumetsat_s3_listing(
    aws_access_key_id,
    aws_secret_access_key,
    s3_bucket_name,
    eumet_s3_geojson_list_file,
    exclude_s3_key='/data/', # key to exlude s3 bucket where esa's FRP are stored.
    match_suffix='.geojson'
)

### Subsetting Eumetsat's FRP product using ESA's S3 listing of .SEN3 files
The ESA's file listing with `.SEN3` is used to subset Eumetsat's FRP products. Eumetsat's FRP products has approximately 117000 files `.SEN3` extensions.\
The subsetting will enable to limit the Eumetsat's FRP product to that of ESA's FRP product over Australiasian bounds. The subsetting is done by performing\
acquisition date match, sensor (S3A/B) and relative oribit number. We verified that the FRP products from both ESA and Eumetsat has maintained the same filenames\
attributes (acquisition dates, sensor and relative orbit).

In [None]:
# if .pkl extension file exists in esa_s3_sen3_frp_list_file and eumetsat_s3_sen3_frp_list_file
# directory with same name, then it is best to pass read_pickle to True, it will
# save time in regenerating attributes dataframe. .pkl file contains the pandas dataframe with 
# filename attributes already parsed. 
read_pickle = False
if (
    (Path(eumetsat_ftp_frp_list_file).with_suffix(".pkl").exists())
    & (Path(esa_s3_sen3_frp_list_file).with_suffix(".pkl").exists())
):
    read_pickle = True

# this subsets the list of FRP product that are both in ESA's and EUMETSAT's FRP
# listings.
eumet_subset_df = eufr.subset_eumetsat_frp_list(
    esa_s3_sen3_frp_list_file,
    eumetsat_ftp_frp_list_file,
    read_pickle=read_pickle
)

#### Create dataframe with list of .SEN3 filenames from Eumetsat's FRP products

In [None]:
# get lists of files that needs uploading to s3 bucket and/or process
# the .geojson file
# names of all the eumetsat's FRP names without extension
eumet_subset_all_names =  eumet_subset_df['title_x'].apply(
    lambda x: Path(x).stem
)
print(eumet_subset_all_names)

#### Create dataframe with list of .SEN3 filenames from ESA's FRP in AWS S3

In [None]:
# names of eumetsat's FRP with .SEN3 extension from s3 bucket
eumet_subset_s3_sen3_df = pd.read_csv(
    eumetsat_s3_sen3_frp_list_file,
    names=['name'],
    header=None
)
eumet_subset_s3_sen3_names = set(eumet_subset_s3_sen3_df['name'].apply(
    lambda x: Path(x).stem
))
for item in eumet_subset_s3_sen3_names:
    print(item)

#### Create dataframe with list of .geojson filename from Eumetsat's FRP in AWS S3

In [None]:
# names of eumetsat's FRP with .geojson extension from s3 bucket
eumet_subset_s3_geojson_df = pd.read_csv(
    eumet_s3_geojson_list_file,
    names=['name'],
    header=None
)
eumet_subset_s3_geojson_names = set(eumet_subset_s3_geojson_df['name'].apply(
    lambda x: Path(Path(x).stem).stem
))
for item in eumet_subset_s3_geojson_names:
    print(item)

#### Initialize AWS session to be used processing stream ahead

In [None]:
# eumetsat_s3_sen3_frp_list_file

In [None]:
# initialize aws session to upload data to s3
aws_session = boto3.Session(
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key
)
s3_client = aws_session.client("s3")

### Main workflow to process .geojson files and upload EUMETSAT's FRP to AWS S3
The processing logics implemented here uses the subset of Eumetsat's FRP products with same filename attributes of ESA's FRP in S3:\
    1. If Eumetsat's FRP product with .SEN3 and .geojson are available in AWS S3 then skip processing, but download .geojson file to local file system for use in analysis.\
    2. If only Eumetsat's FRP product with .SEN3 is available in AWS S3 then fetch FRP from ftp site and process .geojson file and upload to S3. and move to local file\
    3. If only Eumetsat's FRP product with .geojson is available in AWS S3 then fetch the .SEN3 FRP from ftp site and upload to AWS S3. and download .geojson to local file\

In [None]:
s3_upload = True # set to True if files are to be uploaded to s3
for _, row in eumet_subset_df.iterrows():
    _ftp_url = f"ftp://{ftp_url}/{row['title_x']}"  # ftp url for the eumetsat FRP
    _sen3_frp_name = Path(_ftp_url).name # FRP name with .SEN3 extension
    _sen3_frp_stem = Path(_ftp_url).stem # FRP name without extension
    print(_sen3_frp_stem)
    acq_date = re.findall(r"[0-9]{8}T[0-9]{6}" , _sen3_frp_name)[0]
    s3_sen3_exist = False
    s3_geojson_exist = False  # set both the .SEN3 and .geojson s3 existance flags to False
    _frp_dir_name = f"eumetsat_data/{acq_date[0:4]}-{acq_date[4:6]}-{acq_date[6:8]}" # directory to store the eumetsat's .geojson files for processing

    # staging is done in tmpdir to avoid storage limitation of sandbox
    with tempfile.TemporaryDirectory() as tmpdir:   
        _sen3_dir = Path(tmpdir).joinpath(_sen3_frp_name)  # temp directory space to store FRP product
        if _sen3_frp_stem in eumet_subset_s3_sen3_names:
            s3_sen3_exist = True
        if _sen3_frp_stem in eumet_subset_s3_geojson_names:
            s3_geojson_exist = True

        # if .geojson exists in s3 bucket then download the.geojson to local file
        if s3_geojson_exist:
            # make directory to store the file if downloading
            Path(f"{workdir}/{_frp_dir_name}").mkdir(parents=True, exist_ok=True)
            eufr.s3_download_file(
                s3_bucket_name,
                s3_client,
                filename=f"{_sen3_frp_stem}.FRP.geojson",
                out_dir=Path(f"{workdir}/{_frp_dir_name}"),
                prefix=_frp_dir_name
            )
           
        # if .geojson and .SEN3 exists then skip processing
        print(s3_geojson_exist, s3_sen3_exist)
        if (s3_geojson_exist) & (s3_sen3_exist):
            continue
        # if either .SEN3 or .geojson file does not exist in s3
        # then download from the ftp site and process .geojson file if it does not 
        # exist in s3. Upload the file if s3_upload flag is True. Finally
        # store the .geojson file to a local file system.
        eufr.get_eumetsat_dir(
            ftp_username,
            ftp_password,
            _ftp_url,
            tmpdir,
        )
        
        for item in _sen3_dir.iterdir():
            if s3_upload:
                if not s3_sen3_exist:
                    eufr.s3_upload_file(
                        item,
                        s3_client,
                        s3_bucket_name,
                        prefix=f"{_frp_dir_name}/{_sen3_frp_name}"
                    )
            # if .geojson file does not exist then process the file and upload to s3 bucket.
            if not s3_geojson_exist:
                # create .geojson file for FRP file 
                if item.name == "FRP_in.nc":
                    gpd_hotspotfile = _sen3_dir.with_suffix(".FRP.geojson")
                    s3hotspotsgpd = eufr.IPF_FRP_read(item)
                    if len(s3hotspotsgpd) > 0:
                        s3hotspotsgpd.to_file(gpd_hotspotfile, driver='GeoJSON')
                        if s3_upload:
                            print(gpd_hotspotfile)
                            eufr.s3_upload_file(gpd_hotspotfile, s3_client, s3_bucket_name, prefix=_frp_dir_name )

                        # move the .geojson file to local file system for analysis
                        # make directory to store the file if downloading
                        Path(f"{workdir}/{_frp_dir_name}").mkdir(parents=True, exist_ok=True)
                        shutil.move(gpd_hotspotfile.as_posix(), f"{workdir}/{_frp_dir_name}/{gpd_hotspotfile.name}")

### Concatenate .geojson Eumetsat's FRP to an Eumetsat's hotspot file
Set the directory where .geojson file from Eumetsat's FRP product resides. Individual dataframe from .geojson files parsed and additional attributes
are added to form `s3vt_hotspot.geojson` file equivalent \ at working directory named `s3vt_eumetsat_hotspots.geojson`. \ After you have finished running through this notebook you will see the `s3vt_eumetsat_hotspots.geojson` file in the `workdir`. Use `s3vt_eumetsat_hotspots.geojson` file for analysis with other hotspot products.

In [None]:
eumet_geojson_dir = Path("/home/jovyan/s3vt/workdir_s3vt/eumetsat_data") # this is be the directory where .geojson files are downloaded if you follow this notebook. home/jovyan might be different depending on sandbox configuration file
df_eumetsat = []
for fp in eumet_geojson_dir.glob("**/*.geojson"):
    df1 = gpd.read_file(fp.as_posix())
    df1["satellite"] = f"SENTINEL_{fp.name[0:3]}"
    df1['sensor'] = 'SLSTR'
    df2 = df1.query("FRP_MWIR>0")
    if len(df2) > 0:
        df_eumetsat.append(df2)
s3vthotspot_eumetsat = gpd.GeoDataFrame(pd.concat(df_eumetsat, ignore_index=True))
s3vthotspot_eumetsat["date"] = pd.to_datetime(netCDF4.num2date(s3vthotspot_eumetsat.time, units='microseconds since 2000-01-01T00:00:00Z', only_use_cftime_datetimes=False, only_use_python_datetimes=True))
s3vthotspot_eumetsat["solar_day"] = s3vthotspot_eumetsat.apply(lambda row: eufr.solar_day(row.date, row.longitude), axis=1)
s3vthotspot_eumetsat.to_file(workdir.joinpath("s3vt_eumetsat_hotspots.geojson"), driver='GeoJSON')

In [None]:
bbox = (110.0, -50.0, 160.0, -10)
eumetsat_geojson_file = "/home/jovyan/s3vt/workdir_s3vt/workdir_s3vt/s3vt_eumetsat_hotspots.geojson"
df_subset = gpd.read_file(eumetsat_geojson_file, bbox=bbox)

In [None]:
df_subset.plot()