In [1]:
import os
import sys
from pathlib import Path
import logging
import time
import pandas as pd
import geopandas as gpd
import seaborn
import dask
import numpy as np
import dask.dataframe as dd
from dask.distributed import Client
import hotspot_utils as util
import process_nearest_hotspots as nearest_process
import logging

In [2]:
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] %(name)s - %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S',
    stream=sys.stdout,
)
_LOG = logging.getLogger(__name__)

In [3]:
client = Client(processes=False)

# Processing Parameter used in Sub-setting Spatial Extent and Temporal Range for Area of Interest
##### The FRP data from nasa, esa, eumetsat and landgate are merged, sub-setted and neareast hotspots csv files are generated based on the parameters in `processing_parameters`  
##### The parameter `chunks` in blocking FRP data to enable multi-processing. If you encounter memory issues then higher the number.
##### The parameter `compare_field` is the name of column that will be used to generate nearest hotspots, the data in the column should be of datetime type.

In [None]:
processing_parameters = {
    "nasa_frp": "s3://s3vtaustralia/nasa_hotspots_gdf.geojson",
    "esa_frp": "s3://s3vtaustralia/s3vt_hotspots.geojson",
    "eumetsat_frp": "s3://s3vtaustralia/s3vt_eumetsat_hotspots.geojson",
    "landgate_frp": "s3://s3vtaustralia/landgate_hotspots_gdf.geojson",
    "dea_frp": None,
    "lon_west": 147.0,
    "lat_south": -38.0,
    "lon_east": 154.0,
    "lat_north": -27.,
    "start_date": "2019-11-01",
    "end_date": "2020-10-08",
    "start_time": "21:00",
    "end_time": "3:00",
    "chunks": 300,
    "outdir": Path(os.getcwd()).joinpath("workdir"),
    "compare_field": "solar_night"  # solar_day or solar_night
}

In [5]:
# This is to generate nearest .csv files. If .csv files already exists then skip this process. Takes around ~5-6 hours in this sandbox environment with 2-core and 16 GB RAM
nearest_hotspots_product_files = nearest_process.process_nearest_points(**processing_parameters)

2021-03-15 00:21:02 [INFO] process_nearest_hotspots - Processing Neareast Hotspots...
2021-03-15 00:21:02 [INFO] botocore.credentials - Found credentials in environment variables.
2021-03-15 00:21:02 [INFO] hotspot_utils - s3://s3vtaustralia/nasa_hotspots_gdf.geojson exists: skipped download
2021-03-15 00:21:02 [INFO] hotspot_utils - s3://s3vtaustralia/s3vt_hotspots.geojson exists: skipped download
2021-03-15 00:21:02 [INFO] hotspot_utils - s3://s3vtaustralia/s3vt_eumetsat_hotspots.geojson exists: skipped download
2021-03-15 00:21:02 [INFO] hotspot_utils - s3://s3vtaustralia/landgate_hotspots_gdf.geojson exists: skipped download
2021-03-15 00:21:02 [INFO] hotspot_utils - dea Hotspots FRP  is None. excluding from analysis.
2021-03-15 00:21:02 [INFO] process_nearest_hotspots - Reading spatial and temporal subsets of all hotspots dataframes...
2021-03-15 00:21:02 [INFO] hotspot_utils - reading and subsetting GeoDataFrame for nasa: /home/jovyan/s3vt_dask/s3vtdata/workdir/nasa_hotspots_gdf.

## Nearest Hotspots DataFrame merged from neareast hotspots csv files

In [6]:
# csv directory is where nearest hotspots csv files are stored. 
csv_directory = Path(os.getcwd()).joinpath("workdir")
# This is read all the .csv files if name starts with `nearest_points` and ends with `compare_field` value from processing parameters.
nearest_hotspots_csv_files = [
    fp for fp in csv_directory.iterdir()
    if (fp.name.startswith("nearest_points"))
    and (fp.name.endswith(f"{processing_parameters['compare_field']}.csv"))
]

In [7]:
# nearest points csv files that will be used analysis from here on.
nearest_hotspots_csv_files

[PosixPath('/home/jovyan/s3vt_dask/s3vtdata/workdir/nearest_points_SENTINEL_3B_SLSTR_EUMETSAT_solar_night.csv'),
 PosixPath('/home/jovyan/s3vt_dask/s3vtdata/workdir/nearest_points_SENTINEL_3B_SLSTR_ESA_solar_night.csv'),
 PosixPath('/home/jovyan/s3vt_dask/s3vtdata/workdir/nearest_points_SENTINEL_3A_SLSTR_ESA_solar_night.csv'),
 PosixPath('/home/jovyan/s3vt_dask/s3vtdata/workdir/nearest_points_TERRA_MODIS_NASA6.03_solar_night.csv'),
 PosixPath('/home/jovyan/s3vt_dask/s3vtdata/workdir/nearest_points_AQUA_MODIS_LANDGATE_solar_night.csv'),
 PosixPath('/home/jovyan/s3vt_dask/s3vtdata/workdir/nearest_points_AQUA_MODIS_NASA6.03_solar_night.csv'),
 PosixPath('/home/jovyan/s3vt_dask/s3vtdata/workdir/nearest_points_NOAA 20_VIIRS_LANDGATE_solar_night.csv'),
 PosixPath('/home/jovyan/s3vt_dask/s3vtdata/workdir/nearest_points_SUOMI NPP_VIIRS_NASA1_solar_night.csv'),
 PosixPath('/home/jovyan/s3vt_dask/s3vtdata/workdir/nearest_points_NOAA 20_VIIRS_NASA2.0NRT_solar_night.csv'),
 PosixPath('/home/jovyan

In [8]:
# returns a dask DataFrame with index set at column `compare_field` from processing_parameters.
nearest_points_ddf = util.csv_to_dataframe(nearest_hotspots_csv_files, processing_parameters["compare_field"])

In [9]:
nearest_points_ddf.head()

Unnamed: 0_level_0,Unnamed: 0,latitude,longitude,satellite,sensor,confidence,power,datetime,satellite_sensor_product,geometry,...,2_power,2_datetime,2_solar_day,2_satellite_sensor_product,2_geometry,2_solar_night,dist,dist_m,timedelta,count
solar_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-03-10 22:08:59,0,-33.477442,147.76937,SENTINEL_3B,SLSTR,-1.0,0.26,2020-03-10 12:17:55,SENTINEL_3B_SLSTR_EUMETSAT,POINT (147.7693696269182 -33.47744191547758),...,1.1,2020-03-10 15:30:00,2020-03-11 01:21:05,NOAA 20_VIIRS_NASA2.0NRT,POINT (147.77135 -33.48451),2020-03-10 13:21:05,0.00734,805.260796,03:12:05,1
2020-03-10 22:08:59,10221,-33.477442,147.76937,SENTINEL_3B,SLSTR,-1.0,0.27,2020-03-10 12:17:55,SENTINEL_3B_SLSTR_ESA,POINT (147.7693696269182 -33.47744191547758),...,-1.0,2020-03-10 15:28:00,2020-03-11 01:31:31,NOAA 20_VIIRS_LANDGATE,POINT (150.88068 -34.45438),2020-03-10 13:31:31,3.261083,307288.638357,03:10:05,1
2020-03-10 22:08:59,8669,-33.477442,147.76937,SENTINEL_3B,SLSTR,-1.0,0.27,2020-03-10 12:17:55,SENTINEL_3B_SLSTR_ESA,POINT (147.7693696269182 -33.47744191547758),...,-1.0,2020-03-10 14:37:00,2020-03-11 00:27:40,SUOMI NPP_VIIRS_LANDGATE,POINT (147.66688 -33.26938),2020-03-10 12:27:40,0.231935,25004.323299,02:19:05,1
2020-03-10 22:08:59,7653,-33.477442,147.76937,SENTINEL_3B,SLSTR,-1.0,0.27,2020-03-10 12:17:55,SENTINEL_3B_SLSTR_ESA,POINT (147.7693696269182 -33.47744191547758),...,0.26,2020-03-10 12:17:55,2020-03-10 22:08:59,SENTINEL_3B_SLSTR_EUMETSAT,POINT (147.7693696269182 -33.47744191547758),2020-03-10 10:08:59,0.0,0.0,00:00:00,1
2020-03-10 22:08:59,7133,-33.477442,147.76937,SENTINEL_3B,SLSTR,-1.0,0.27,2020-03-10 12:17:55,SENTINEL_3B_SLSTR_ESA,POINT (147.7693696269182 -33.47744191547758),...,0.21,2020-03-10 12:55:52,2020-03-10 22:50:46,SENTINEL_3A_SLSTR_EUMETSAT,POINT (148.7281626244975 -37.77098190463744),2020-03-10 10:50:46,4.399292,484221.886602,00:37:57,1


# Results
## Co-occurrence metrics

In [22]:
region_alias = "nsw"
output_directory = processing_parameters["outdir"]
comparison_prefix = (
    f"{processing_parameters['compare_field']}"
    f"_{processing_parameters['start_date'].replace('-', '')}"
    f"_{processing_parameters['end_date'].replace('-', '')}"
    f"_{region_alias}"
)

In [17]:
# set the nearest distance threshold between two hotspots to confine the analysis within the distance threshold.  
dist_threshold = 5000  # units in meters

In [18]:
nearest_ddf_dist_subset = client.persist(nearest_points_ddf[nearest_points_ddf["dist_m"] < dist_threshold])

In [19]:
# Count of hotspot matches < dist_threshold
numerator = util.dask_pivot_table(
    nearest_ddf_dist_subset,
    index="2_satellite_sensor_product",
    column="satellite_sensor_product",
    values="count",
    aggfunc="count"
).compute()

In [25]:
numerator.astype(int).to_csv(output_directory.joinpath(f"{comparison_prefix}_matches_{dist_threshold}.csv"))
numerator.astype(int)

satellite_sensor_product,SENTINEL_3B_SLSTR_EUMETSAT,SENTINEL_3B_SLSTR_ESA,SENTINEL_3A_SLSTR_EUMETSAT,SENTINEL_3A_SLSTR_ESA,TERRA_MODIS_NASA6.03,TERRA_MODIS_LANDGATE,NOAA 20_VIIRS_NASA2.0NRT,SUOMI NPP_VIIRS_LANDGATE,SUOMI NPP_VIIRS_NASA1,AQUA_MODIS_LANDGATE,NOAA 20_VIIRS_LANDGATE,AQUA_MODIS_NASA6.03
2_satellite_sensor_product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NOAA 20_VIIRS_NASA2.0NRT,1682,1168,1808,1222,287,1721,14518,23575,4853,1134,25034,94
SENTINEL_3B_SLSTR_EUMETSAT,2166,1008,426,166,83,432,2461,4779,1299,297,3781,29
SENTINEL_3B_SLSTR_ESA,1008,1565,168,236,121,411,1744,2542,1818,246,2554,35
SUOMI NPP_VIIRS_NASA1,853,1243,740,1235,247,924,4760,9310,6139,681,6898,84
SUOMI NPP_VIIRS_LANDGATE,1524,1012,1643,1036,253,1561,9560,29057,4714,1109,18250,93
NOAA 20_VIIRS_LANDGATE,1355,981,1561,975,261,1652,11863,21469,4021,1058,26570,84
SENTINEL_3A_SLSTR_EUMETSAT,596,254,2257,867,81,534,2913,5545,1176,334,5064,26
SENTINEL_3A_SLSTR_ESA,256,354,873,1517,126,448,1805,3077,1842,336,2674,38
TERRA_MODIS_NASA6.03,232,318,177,280,317,748,1222,2666,1003,310,2602,40
TERRA_MODIS_LANDGATE,531,482,669,463,297,1865,3849,8343,1753,582,7850,54


In [24]:
# Count of hotspot matches - total  
denominator = util.dask_pivot_table(
        nearest_points_ddf,
        index="2_satellite_sensor_product",
        column="satellite_sensor_product",
        values="count",
        aggfunc="count",
    ).compute()

In [26]:
denominator.astype(int).to_csv(output_directory.joinpath(f"{comparison_prefix}_matches_count.csv"))
denominator.astype(int)

satellite_sensor_product,SENTINEL_3B_SLSTR_EUMETSAT,SENTINEL_3B_SLSTR_ESA,SENTINEL_3A_SLSTR_EUMETSAT,SENTINEL_3A_SLSTR_ESA,TERRA_MODIS_NASA6.03,TERRA_MODIS_LANDGATE,NOAA 20_VIIRS_NASA2.0NRT,SUOMI NPP_VIIRS_LANDGATE,SUOMI NPP_VIIRS_NASA1,AQUA_MODIS_LANDGATE,NOAA 20_VIIRS_LANDGATE,AQUA_MODIS_NASA6.03
2_satellite_sensor_product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NOAA 20_VIIRS_NASA2.0NRT,2061,1460,2232,1492,310,1815,14518,28793,5943,1149,26295,97
NOAA 20_VIIRS_LANDGATE,2043,1565,2147,1500,317,1863,13860,28746,6131,1172,26570,98
SUOMI NPP_VIIRS_LANDGATE,2000,1552,2161,1517,305,1816,13669,29057,6080,1171,25986,98
SENTINEL_3B_SLSTR_EUMETSAT,2166,1016,724,284,101,573,4421,8240,2369,450,6856,34
SENTINEL_3A_SLSTR_EUMETSAT,1145,520,2257,874,100,651,4690,8895,2092,425,7979,31
SENTINEL_3B_SLSTR_ESA,1022,1565,297,483,159,590,3558,4759,3599,374,4847,46
SUOMI NPP_VIIRS_NASA1,1022,1565,898,1517,264,979,6073,9787,6139,689,8879,84
SENTINEL_3A_SLSTR_ESA,527,796,898,1517,155,565,3505,5256,3501,416,4980,45
TERRA_MODIS_NASA6.03,710,1095,625,1055,317,1078,5324,9832,4458,504,9481,63
TERRA_MODIS_LANDGATE,1474,1300,1729,1236,315,1865,11155,23265,5095,936,22121,68


In [28]:
# Difference of matched points closer than 5000m
difference = denominator - numerator

In [29]:
difference.astype(int).to_csv(output_directory.joinpath(f"{comparison_prefix}_count_difference.csv"))
difference.astype(int)

satellite_sensor_product,SENTINEL_3B_SLSTR_EUMETSAT,SENTINEL_3B_SLSTR_ESA,SENTINEL_3A_SLSTR_EUMETSAT,SENTINEL_3A_SLSTR_ESA,TERRA_MODIS_NASA6.03,TERRA_MODIS_LANDGATE,NOAA 20_VIIRS_NASA2.0NRT,SUOMI NPP_VIIRS_LANDGATE,SUOMI NPP_VIIRS_NASA1,AQUA_MODIS_LANDGATE,NOAA 20_VIIRS_LANDGATE,AQUA_MODIS_NASA6.03
2_satellite_sensor_product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NOAA 20_VIIRS_NASA2.0NRT,379,292,424,270,23,94,0,5218,1090,15,1261,3
NOAA 20_VIIRS_LANDGATE,688,584,586,525,56,211,1997,7277,2110,114,0,14
SUOMI NPP_VIIRS_LANDGATE,476,540,518,481,52,255,4109,0,1366,62,7736,5
SENTINEL_3B_SLSTR_EUMETSAT,0,8,298,118,18,141,1960,3461,1070,153,3075,5
SENTINEL_3A_SLSTR_EUMETSAT,549,266,0,7,19,117,1777,3350,916,91,2915,5
SENTINEL_3B_SLSTR_ESA,14,0,129,247,38,179,1814,2217,1781,128,2293,11
SUOMI NPP_VIIRS_NASA1,169,322,158,282,17,55,1313,477,0,8,1981,0
SENTINEL_3A_SLSTR_ESA,271,442,25,0,29,117,1700,2179,1659,80,2306,7
TERRA_MODIS_NASA6.03,478,777,448,775,0,330,4102,7166,3455,194,6879,23
TERRA_MODIS_LANDGATE,943,818,1060,773,18,0,7306,14922,3342,354,14271,14


In [31]:
# Percentage of matched points closer than dist_threshold
percentage = (numerator / denominator) * 100
percentage = np.round(percentage, 2)

In [32]:
percentage.to_csv(output_directory.joinpath(f"{comparison_prefix}_percentage.csv"))
percentage

satellite_sensor_product,SENTINEL_3B_SLSTR_EUMETSAT,SENTINEL_3B_SLSTR_ESA,SENTINEL_3A_SLSTR_EUMETSAT,SENTINEL_3A_SLSTR_ESA,TERRA_MODIS_NASA6.03,TERRA_MODIS_LANDGATE,NOAA 20_VIIRS_NASA2.0NRT,SUOMI NPP_VIIRS_LANDGATE,SUOMI NPP_VIIRS_NASA1,AQUA_MODIS_LANDGATE,NOAA 20_VIIRS_LANDGATE,AQUA_MODIS_NASA6.03
2_satellite_sensor_product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NOAA 20_VIIRS_NASA2.0NRT,81.61,80.0,81.0,81.9,92.58,94.82,100.0,81.88,81.66,98.69,95.2,96.91
SENTINEL_3B_SLSTR_EUMETSAT,100.0,99.21,58.84,58.45,82.18,75.39,55.67,58.0,54.83,66.0,55.15,85.29
SENTINEL_3B_SLSTR_ESA,98.63,100.0,56.57,48.86,76.1,69.66,49.02,53.41,50.51,65.78,52.69,76.09
SUOMI NPP_VIIRS_NASA1,83.46,79.42,82.41,81.41,93.56,94.38,78.38,95.13,100.0,98.84,77.69,100.0
SUOMI NPP_VIIRS_LANDGATE,76.2,65.21,76.03,68.29,82.95,85.96,69.94,100.0,77.53,94.71,70.23,94.9
NOAA 20_VIIRS_LANDGATE,66.32,62.68,72.71,65.0,82.33,88.67,85.59,74.69,65.58,90.27,100.0,85.71
SENTINEL_3A_SLSTR_EUMETSAT,52.05,48.85,100.0,99.2,81.0,82.03,62.11,62.34,56.21,78.59,63.47,83.87
SENTINEL_3A_SLSTR_ESA,48.58,44.47,97.22,100.0,81.29,79.29,51.5,58.54,52.61,80.77,53.69,84.44
TERRA_MODIS_NASA6.03,32.68,29.04,28.32,26.54,100.0,69.39,22.95,27.12,22.5,61.51,27.44,63.49
TERRA_MODIS_LANDGATE,36.02,37.08,38.69,37.46,94.29,100.0,34.5,35.86,34.41,62.18,35.49,79.41


In [33]:
# Maximum time between match points < dist_threshold
timemax = util.pandas_pivot_table(
    nearest_ddf_dist_subset.compute(),
    index=["satellite_sensor_product"],
    columns=["2_satellite_sensor_product"],
    values=["timedelta"],
    aggfunc={"timedelta": np.max}
    
)

In [35]:
timemax.to_csv(output_directory.joinpath(f"{comparison_prefix}_max_time_matched_points.csv"))
timemax

Unnamed: 0_level_0,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta
2_satellite_sensor_product,AQUA_MODIS_LANDGATE,AQUA_MODIS_NASA6.03,NOAA 20_VIIRS_LANDGATE,NOAA 20_VIIRS_NASA2.0NRT,SENTINEL_3A_SLSTR_ESA,SENTINEL_3A_SLSTR_EUMETSAT,SENTINEL_3B_SLSTR_ESA,SENTINEL_3B_SLSTR_EUMETSAT,SUOMI NPP_VIIRS_LANDGATE,SUOMI NPP_VIIRS_NASA1,TERRA_MODIS_LANDGATE,TERRA_MODIS_NASA6.03
satellite_sensor_product,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
AQUA_MODIS_LANDGATE,00:17:00,00:11:00,01:25:00,01:49:00,03:48:41,03:46:01,03:37:23,03:47:38,01:13:00,01:57:00,03:25:00,03:20:00
AQUA_MODIS_NASA6.03,00:11:00,00:00:00,01:06:00,01:30:00,03:37:40,03:37:40,03:09:54,03:09:54,00:55:00,01:03:00,03:20:00,03:14:00
NOAA 20_VIIRS_LANDGATE,01:24:00,01:06:00,01:43:00,01:45:00,03:52:38,03:56:31,03:55:51,03:58:44,00:59:00,00:58:00,03:57:00,04:02:00
NOAA 20_VIIRS_NASA2.0NRT,01:49:00,01:30:00,01:45:00,00:00:00,03:50:38,03:48:00,03:53:51,03:57:44,00:56:00,00:55:00,03:55:00,04:00:00
SENTINEL_3A_SLSTR_ESA,03:42:30,03:37:40,03:50:38,03:50:38,00:00:01,00:00:01,00:40:09,00:40:05,03:59:44,04:05:53,01:25:58,01:05:00
SENTINEL_3A_SLSTR_EUMETSAT,03:46:02,03:37:40,03:55:31,03:48:00,00:00:01,00:00:01,00:40:05,00:40:05,03:56:59,04:05:53,01:42:36,01:05:00
SENTINEL_3B_SLSTR_ESA,03:29:15,03:09:54,03:55:51,03:53:49,00:40:09,00:40:05,00:00:01,00:00:01,04:02:25,03:50:02,01:14:07,01:00:34
SENTINEL_3B_SLSTR_EUMETSAT,03:35:38,03:09:54,03:58:45,03:57:44,00:40:05,00:40:05,00:00:01,00:00:01,03:56:53,03:56:53,01:20:38,01:00:34
SUOMI NPP_VIIRS_LANDGATE,01:11:00,00:58:00,00:58:00,00:56:00,04:00:00,04:03:31,04:02:25,04:06:33,01:43:00,01:41:00,03:45:00,03:50:00
SUOMI NPP_VIIRS_NASA1,01:57:00,01:51:00,00:58:00,00:55:00,04:05:53,04:05:53,04:02:25,03:56:53,01:40:00,00:00:00,03:45:00,03:50:00


In [36]:
# Minimum time between match points < dist_threshold
timemin = util.pandas_pivot_table(
    nearest_ddf_dist_subset.compute(),
    index=["satellite_sensor_product"],
    columns=["2_satellite_sensor_product"],
    values=["timedelta"],
    aggfunc={"timedelta": np.min}
    
)

In [38]:
timemin.to_csv(output_directory.joinpath(f"{comparison_prefix}_min_time_matched_points.csv"))
timemin

Unnamed: 0_level_0,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta,timedelta
2_satellite_sensor_product,AQUA_MODIS_LANDGATE,AQUA_MODIS_NASA6.03,NOAA 20_VIIRS_LANDGATE,NOAA 20_VIIRS_NASA2.0NRT,SENTINEL_3A_SLSTR_ESA,SENTINEL_3A_SLSTR_EUMETSAT,SENTINEL_3B_SLSTR_ESA,SENTINEL_3B_SLSTR_EUMETSAT,SUOMI NPP_VIIRS_LANDGATE,SUOMI NPP_VIIRS_NASA1,TERRA_MODIS_LANDGATE,TERRA_MODIS_NASA6.03
satellite_sensor_product,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
AQUA_MODIS_LANDGATE,00:00:00,00:00:00,00:01:00,00:04:00,01:36:05,01:54:30,01:46:56,01:50:21,00:01:00,00:00:00,01:16:00,01:26:00
AQUA_MODIS_NASA6.03,00:00:00,00:00:00,00:03:00,00:01:00,02:00:30,02:00:30,01:54:21,01:54:21,00:04:00,00:01:00,01:24:00,01:33:00
NOAA 20_VIIRS_LANDGATE,00:00:00,00:03:00,00:00:00,00:00:00,01:40:10,01:15:25,01:29:29,01:29:29,00:45:00,00:41:00,00:32:00,01:29:00
NOAA 20_VIIRS_NASA2.0NRT,00:04:00,00:01:00,00:00:00,00:00:00,00:57:25,00:49:51,00:46:35,00:49:02,00:40:00,00:46:00,00:20:00,00:30:00
SENTINEL_3A_SLSTR_ESA,01:36:05,02:00:30,01:40:10,00:58:21,00:00:00,00:00:00,00:38:54,00:38:54,01:31:37,01:22:18,00:01:14,00:00:01
SENTINEL_3A_SLSTR_EUMETSAT,01:54:30,02:00:30,01:16:38,00:49:51,00:00:00,00:00:00,00:38:54,00:38:50,01:35:41,01:28:15,00:00:09,00:00:01
SENTINEL_3B_SLSTR_ESA,01:46:56,01:54:21,01:29:29,00:46:35,00:38:54,00:38:54,00:00:00,00:00:00,01:22:31,00:47:34,00:00:25,00:03:31
SENTINEL_3B_SLSTR_EUMETSAT,01:50:21,01:54:21,01:29:29,00:49:02,00:38:54,00:38:49,00:00:00,00:00:00,01:06:31,00:47:34,00:01:34,00:03:31
SUOMI NPP_VIIRS_LANDGATE,00:01:00,00:04:00,00:45:00,00:40:00,01:31:37,01:35:41,01:22:31,01:06:31,00:00:00,00:00:00,00:33:00,01:04:00
SUOMI NPP_VIIRS_NASA1,00:00:00,00:01:00,00:41:00,00:46:00,01:11:29,01:28:15,00:47:34,00:47:34,00:00:00,00:00:00,00:24:00,00:46:00


In [39]:
# Average distance (m) between matched points < dist_threshold
averagedist = util.dask_pivot_table(
    nearest_ddf_dist_subset,
    index="2_satellite_sensor_product",
    column="satellite_sensor_product",
    values="dist_m",
    aggfunc="mean",
).compute()

In [41]:
averagedist = np.round(averagedist, 2)
averagedist.to_csv(output_directory.joinpath(f"{comparison_prefix}_average_distance_{dist_threshold}m.csv"))
averagedist

satellite_sensor_product,SENTINEL_3B_SLSTR_EUMETSAT,SENTINEL_3B_SLSTR_ESA,SENTINEL_3A_SLSTR_EUMETSAT,SENTINEL_3A_SLSTR_ESA,TERRA_MODIS_NASA6.03,TERRA_MODIS_LANDGATE,NOAA 20_VIIRS_NASA2.0NRT,SUOMI NPP_VIIRS_LANDGATE,SUOMI NPP_VIIRS_NASA1,AQUA_MODIS_LANDGATE,NOAA 20_VIIRS_LANDGATE,AQUA_MODIS_NASA6.03
2_satellite_sensor_product,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NOAA 20_VIIRS_NASA2.0NRT,778.63,766.78,607.15,623.99,346.88,642.52,0.0,323.49,265.92,667.61,167.22,329.62
SENTINEL_3B_SLSTR_EUMETSAT,0.0,1.62,715.7,684.7,639.55,849.83,595.91,702.59,625.79,1085.63,617.53,823.87
SENTINEL_3B_SLSTR_ESA,2.1,0.0,683.11,669.57,636.97,859.24,603.11,722.13,617.65,1221.72,665.67,820.02
SUOMI NPP_VIIRS_NASA1,800.46,774.96,618.55,635.31,374.93,621.54,275.51,229.77,0.0,711.52,299.81,404.85
SUOMI NPP_VIIRS_LANDGATE,848.98,962.06,627.05,693.43,352.41,550.58,313.61,0.0,279.34,622.58,237.1,385.15
NOAA 20_VIIRS_LANDGATE,789.9,842.24,594.11,687.17,334.52,573.96,167.38,247.98,344.06,599.94,0.0,317.77
SENTINEL_3A_SLSTR_EUMETSAT,1001.92,1036.41,0.0,3.12,567.95,778.07,537.82,555.75,554.81,674.37,537.57,571.36
SENTINEL_3A_SLSTR_ESA,1035.91,997.66,6.82,0.0,534.1,789.76,530.57,552.0,550.06,707.86,548.82,525.27
TERRA_MODIS_NASA6.03,1404.8,1403.36,1045.93,989.78,0.0,550.45,730.39,718.11,701.48,1061.78,726.47,717.82
TERRA_MODIS_LANDGATE,1347.41,1341.93,1092.62,1028.9,340.75,0.0,829.96,698.34,828.94,830.42,748.0,686.53


# Persistent Hotspot Compasiron

In [49]:
persistent_hotspots = gpd.GeoDataFrame.from_file(f"zip://{output_directory.joinpath('Known non FHS - Version 5.2.zip')}/Version 5.2/known_non_FHS.shp")

2021-03-15 01:15:35 [ERROR] fiona._env - Unable to open EPSG support file gcs.csv.  Try setting the GDAL_DATA environment variable to point to the directory containing EPSG csv files.


In [51]:
persistent_hotspots.head()

Unnamed: 0,Longitude,Latitude,Comment,geometry
0,114.996,-21.697,"LNG Plant - Wheatstone, WA",POINT (114.99600 -21.69700)
1,115.439,-20.782,"LNG Plant - Gorgon, WA",POINT (115.43900 -20.78200)
2,116.781,-20.596,"LNG Plant - Karratha, WA",POINT (116.78100 -20.59600)
3,121.484,-30.873,"Nickel Smelter - Kalgoorlie, WA",POINT (121.48400 -30.87300)
4,136.859,-30.446,"Uranium Mine - Olympic Dam, SA",POINT (136.85899 -30.44600)


In [None]:
nearest_persistent = ckdnearest( hotspots_gdf, persistent_hotspots)