In [None]:
import os
import sys
from pathlib import Path
import logging
import time
import pandas as pd
import geopandas as gpd
import seaborn
import dask
import numpy as np
import dask.dataframe as dd
from dask.distributed import Client

sys.path.insert(0, '..')
import src.hotspot_utils as util

In [None]:
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] %(name)s - %(message)s',
    level=logging.DEBUG,
    datefmt='%Y-%m-%d %H:%M:%S',
    stream=sys.stdout,
)
_LOG = logging.getLogger(__name__)

In [None]:
# include n_workers equal or less than the number of core
# To visualise status in Dask add /user/<username>/proxy/8787/status
client = Client(n_workers=8)
client

# Persistent Hotspot Comparison
Landgate developed a set of persistent hotspots  (v5.2) by associating recurrent hotspots with known heat sources (industrial activity). Knowledge of persistent hotspots are used here to provide a baseline for the ability of a given sensor to detect hotspots. 

Persistent hotspots, together with high resolution hotspot sources from Sentinel 2 MSI and Landsat Enhanced Thematic Mapper and Opertional Land Imager sensors are used here to validate hotspots from the AVHRR, VIIRS, MODIS and SLSTR instruments.  

In [None]:
# This is the output directory outputs and itermediary files from this notebook examples will be stored.
outdir =  Path("/home/jovyan/s3vt_dask/s3vtdata/workdir_test3")

# Data directory where .geojson files and presistent hotspots shape files are located.
data_dir = Path("/home/jovyan/s3vt_dask/s3vtdata/workdir_test1")

In [None]:
# Read persistent hotspots from a shape file in the zip folder.
persistent_hotspots_gdf = gpd.GeoDataFrame.from_file(f"zip://{data_dir.joinpath('Known non FHS - Version 5.2.zip')}/Version 5.2/known_non_FHS.shp")

In [None]:
persistent_hotspots_gdf = persistent_hotspots_gdf.rename(columns={'Latitude': 'latitude', 'Longitude': 'longitude'})

In [None]:
persistent_hotspots_gdf

In [None]:
# This is a processing parameter to load hotspots from a .geojson files within the temporal and spatial bounds provided.
processing_parameters = {
    "nasa_frp": "s3://s3vtaustralia/nasa_hotspots_gdf.geojson",
    "esa_frp": "s3://s3vtaustralia/s3vt_hotspots.geojson",
    "eumetsat_frp": "s3://s3vtaustralia/s3vt_eumetsat_hotspots.geojson",
    "landgate_frp": "s3://s3vtaustralia/landgate_hotspots_gdf.geojson",
    "dea_frp": None,
    "start_date": "2019-11-01",
    "end_date": "2020-10-08",
    "bbox": (113.0, -44.0, 154.0, -10.0),
    "chunks": 300,
    "outdir": outdir,
}

In [None]:
processing_parameters = {
    "nasa_frp": "s3://s3vtaustralia/nasa_hotspots_gdf.geojson",
    "esa_frp": "s3://s3vtaustralia/s3vt_hotspots.geojson",
    "eumetsat_frp": "s3://s3vtaustralia/s3vt_eumetsat_hotspots.geojson",
    "landgate_frp": "s3://s3vtaustralia/landgate_hotspots_gdf.geojson",
    "sentinel3_swath_geojson": "s3://s3vtaustralia/sentinel3_swath_gdfs.geojson",
    "dea_frp": None,
    "lon_west": 113.0, #147.0,
    "lat_south": -44, #-38.0,
    "lon_east": 154.0,
    "lat_north": -10, #-27.,
    "start_date":  "2020-02-02", #"2019-11-01",
    "end_date": "2020-10-08",
    "start_time": "20:00",
    "end_time": "03:00",
    "chunks": 250,
    "compare_field": "solar_night",
    "swath_config_file": Path("/home/jovyan/s3vt_dask/s3vtdata/configs/s3vtconfig.yaml"),
    "outdir": outdir,
    "test": False
}

In [None]:
hotspots_pkl_file = Path(data_dir).joinpath(
    f"all_hotspots_{int(processing_parameters['bbox'][2])}_{int(processing_parameters['bbox'][0])}_{processing_parameters['start_date'].replace('-','')}_{processing_parameters['end_date'].replace('-','')}.pkl"
)

In [None]:
if not hotspots_pkl_file.exists():
    hotspots_gdf = util.process_hotspots_gdf(**processing_parameters)
    hotspots_gdf.to_pickle(hotspots_pkl_file)
else:
    hotspots_gdf = pd.read_pickle(hotspots_pkl_file)

In [None]:
len(hotspots_gdf)

In [None]:
persistent_hotspots_nearest_df = util.ckdnearest(hotspots_gdf, persistent_hotspots_gdf)

In [None]:
persistent_hotspots_nearest_df["count"] = 1

In [None]:
persistent_hotspots_nearest_df = persistent_hotspots_nearest_df.drop(['geometry', '2_geometry'], axis=1)

In [None]:
dd_persistent_hotspots_nearest_df = dd.from_pandas(persistent_hotspots_nearest_df, npartitions=100)

In [None]:
len(dd_persistent_hotspots_nearest_df)

In [None]:
dd_persistent_hotspots_nearest_df["dist_m"] = dd_persistent_hotspots_nearest_df.map_partitions(util._distance, meta=persistent_hotspots_nearest_df.dtypes)

In [None]:
dd_persistent_hotspots_nearest_df = dd_persistent_hotspots_nearest_df.compute()

In [None]:
persistentcount = util.pandas_pivot_table(
    dd_persistent_hotspots_nearest_df,
    index=["2_Comment"],
    columns=["satellite_sensor_product"],
    values=["count"],
    aggfunc={"count": np.sum}
    
)

In [None]:
persistentcount

In [None]:
persistentcount_5000 = util.pandas_pivot_table(
    dd_persistent_hotspots_nearest_df[dd_persistent_hotspots_nearest_df['dist_m'] < 5000],
    index=["2_Comment"],
    columns=["satellite_sensor_product"],
    values=["count"],
    aggfunc={"count": np.sum}
    
)

In [None]:
persistentcount_5000

In [None]:
persistent5km = dd_persistent_hotspots_nearest_df[dd_persistent_hotspots_nearest_df['dist_m'] < 5000]

In [None]:
persistent5km.set_index('2_Comment', inplace=True)
#persistent5km.set_index('solar_night', inplace=True)

In [None]:
# which persistent hotspots were imaged by a target sensor? on any given day?
#for index_a, gdf_ra in persistent5km.resample("D", on='solar_night'):
#    print(index_a, gdf_ra)

In [None]:
# need a day count for intersections with persistent point and image swath.
# what date do we have hotspots for?
# 1. load swaths
swaths = pd.read_pickle('../workdir_test1/swaths_154_113_20191101_20201008.pkl')

In [None]:
swath_gdf = swaths[swaths['geometry'].is_valid == True]

In [None]:
start_time_utc, end_time_utc = util.convert_solar_time_to_utc(154.0, 113.0, "20:00", "03:00")

In [None]:
hotspots_gdf.reset_index(inplace=True)

In [None]:
hotspots_gdf

In [None]:
hotspots_gdf = hotspots_gdf.between_time('09:44' , '19:28')
swath_gdf.set_index('AcquisitionOfSignalUTC', inplace=True)
swath_gdf = swath_gdf.between_time(start_time_utc, end_time_utc)

In [None]:
# Simplify matching between swaths and hotspots
for index, row in swath_gdf.iterrows():
    if row['Satellite'] == 'NPP':
        swath_gdf.at[index,'Satellite']= 'SUOMI NPP'
    if row['Satellite'] == 'NOAA_20':
        swath_gdf.at[index,'Satellite']= 'NOAA 20'
    if row['Satellite'] == 'NOAA_19':
        swath_gdf.at[index,'Satellite']= 'NOAA-19' 
    if row['Satellite'] == 'Sentinel_3A':
        swath_gdf.at[index,'Satellite']= 'SENTINEL_3A'
    if row['Satellite'] == 'Sentinel_3B':
        swath_gdf.at[index,'Satellite']= 'SENTINEL_3B'

In [None]:
satellite_sensor_product = hotspots_gdf['satellite_sensor_product'].unique()

In [None]:
satellite_sensor_product

In [None]:
persistent_row = gpd.GeoDataFrame(row)

In [None]:
persistent_row = persistent_row.transpose()

In [None]:
#util.ckdnearest(gpd.GeoDataFrame(row, geometry='geometry'), gdf_ra)
#gpd.GeoDataFrame(row)
matched_hotspot = util.ckdnearest(persistent_row, gdf_ra.reset_index())

In [None]:
persistent_row['Comment'].iloc[0]

In [None]:
dist_m[0]

In [None]:
persistent_results_list = []
# For each hotspot product
for product in satellite_sensor_product:
    for index_a, gdf_ra in hotspots_gdf[hotspots_gdf['satellite_sensor_product'] == product].resample("D", on='solar_night'):
        # Get the corresponding geometry for the satellite pass
        swath_gdf[swath_gdf.index.date == index_a]
        #print( index_a, product)
        try:
            subset_swath_gdf = swath_gdf[swath_gdf.index.date == index_a]
            sensor_geom = subset_swath_gdf[subset_swath_gdf['Satellite'] == gdf_ra['satellite'].iloc[0]].unary_union
            #print(sensor_geom.is_valid, sensor_geom.bounds)
            for index, row in persistent_hotspots_gdf.iterrows():
                intersection = row.geometry.intersection(sensor_geom)
                if intersection:
                    persistent_row = gpd.GeoDataFrame(row)
                    persistent_row = persistent_row.transpose()
                    gpd_result = util.ckdnearest(persistent_row, gdf_ra.reset_index())
                    dist_m = util._distance(gpd_result)
                    
                    gpd_result['dist_m'] = dist_m[0]
                    
                    if dist_m[0] < 5000.0:
                        gpd_result['detected5km'] = 1
                        
                    else:
                        gpd_result['detected5km'] = 0
                        
                    persistent_results_list.append(gpd_result)
                    
        except:
            #print('no geometry')
            pass
persistent_results_gpd = pd.concat(persistent_results_list)

In [None]:
persistent_results_gpd['count'] = 1

In [None]:
#persistent_results_gpd[persistent_results_gpd['detected5km'] == 1]
persistent_results_denominator = persistent_results_gpd.pivot_table(
    index=["Comment"],
    columns=["2_satellite_sensor_product"],
    values=["count"],
    aggfunc={"count": np.sum}
    
)

In [None]:
#persistent_results_gpd[persistent_results_gpd['detected5km'] == 1]
persistent_results_numerator = persistent_results_gpd.pivot_table(
    index=["Comment"],
    columns=["2_satellite_sensor_product"],
    values=["detected5km"],
    aggfunc={"detected5km": np.sum}
    
)

In [None]:
persistent_results_numerator.index

In [None]:
percentage_persistent = (persistent_results_numerator / persistent_results_denominator)* 100

In [None]:
persistent_results_numerator

In [None]:
persistent_results_denominator

In [None]:
persistentcount_5000 = util.pandas_pivot_table(
    dd_persistent_hotspots_nearest_df[dd_persistent_hotspots_nearest_df['dist_m'] < 5000],
    index=["2_Comment"],
    columns=["satellite_sensor_product"],
    values=["count"],
    aggfunc={"count": np.sum}
    
)

In [None]:
sensor_geom

In [None]:
index_a + pd.DateOffset(hours=int(start_time_utc[0:2])) + pd.DateOffset(minutes=int(start_time_utc[3:5]))

In [None]:
swath_gdf.min()

In [None]:
sensor_geom = swath_gdf[swath_gdf['Satellite'] == gdf_ra['satellite'].iloc[0]].unary_union

In [None]:
sensor_geom

In [None]:
    # For each night_pass date and group of hotspots for that date
    for index_a, gdf_ra in hotspots_gdf[hotspots_gdf['satellite_sensor_product'] == product].resample("D", on='solar_night'):
        # Get the corresponding geometry for the satellite pass
        try:
            sensor_geom = swath_gdf[swath_gdf['Satellite'] == gdf_ra['satellite'].iloc[0]].unary_union

In [None]:
persistent_hotspot_results

In [None]:
swath_gdf[swath_gdf['Satellite'] == gdf_ra['satellite'].iloc[0]]

In [None]:
swath_gdf['Satellite'].iloc[0]

In [None]:
gdf_ra['satellite'][0:1]

In [None]:
swath_gdf.columns

In [None]:
hotspots_gdf['satellite'].unique()

In [None]:
swath_gdf['Satellite'].unique()

In [None]:
for day in range (start_date,end_date):
    hotspot_subset = hotspots_gdf.between_time(start_time_utc, end_time_utc)
    
    swath_subset = swath_gdf.between_time(start_time_utc, end_time_utc)
    for index, row in persistent_hotspots_gdf.iterrows():
        intersection = row.geometry.intersection(swath_subset)
        if intersection:
            intersection = 1
            ckdnearest(row, hotspot_subset)
            if nearest < 5km = True:
                detected5km = 1
            else:
                deteced5km = 0
        else:
            intersection = 0
        

In [None]:
intersection

In [None]:
swath_gdf_subset = swath_gdf_subset.unary_union
intersection = persistent_hotspots_gdf.intersection(sensor_b_geom)