In [1]:
import os
import sys
from pathlib import Path
import logging
import time
import pandas as pd
import geopandas as gpd
import seaborn
import dask
import numpy as np
import dask.dataframe as dd
from dask.distributed import Client

sys.path.insert(0, '..')
import src.hotspot_utils as util

In [2]:
logging.basicConfig(
    format='%(asctime)s [%(levelname)s] %(name)s - %(message)s',
    level=logging.DEBUG,
    datefmt='%Y-%m-%d %H:%M:%S',
    stream=sys.stdout,
)
_LOG = logging.getLogger(__name__)

In [3]:
# include n_workers equal or less than the number of core
# To visualise status in Dask add /user/<username>/proxy/8787/status
client = Client(n_workers=8)
client

0,1
Client  Scheduler: tcp://127.0.0.1:34639  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 8  Cores: 16  Memory: 66.57 GB


# Persistent Hotspot Comparison
Landgate developed a set of persistent hotspots  (v5.2) by associating recurrent hotspots with known heat sources (industrial activity). Knowledge of persistent hotspots are used here to provide a baseline for the ability of a given sensor to detect hotspots. 

Persistent hotspots, together with high resolution hotspot sources from Sentinel 2 MSI and Landsat Enhanced Thematic Mapper and Opertional Land Imager sensors are used here to validate hotspots from the AVHRR, VIIRS, MODIS and SLSTR instruments.  

In [4]:
# This is the output directory outputs and itermediary files from this notebook examples will be stored.
outdir =  Path("/home/jovyan/s3vt_dask/s3vtdata/workdir_test3")

# Data directory where .geojson files and presistent hotspots shape files are located.
data_dir = Path("/home/jovyan/s3vt_dask/s3vtdata")

In [5]:
# Read persistent hotspots from a shape file in the zip folder.
persistent_hotspots_gdf = gpd.GeoDataFrame.from_file(f"zip://{data_dir.joinpath('Known non FHS - Version 5.2.zip')}/Version 5.2/known_non_FHS.shp")

In [6]:
persistent_hotspots_gdf = persistent_hotspots_gdf.rename(columns={'Latitude': 'latitude', 'Longitude': 'longitude'})

In [7]:
persistent_hotspots_gdf

Unnamed: 0,longitude,latitude,Comment,geometry
0,114.996,-21.697,"LNG Plant - Wheatstone, WA",POINT (114.99600 -21.69700)
1,115.439,-20.782,"LNG Plant - Gorgon, WA",POINT (115.43900 -20.78200)
2,116.781,-20.596,"LNG Plant - Karratha, WA",POINT (116.78100 -20.59600)
3,121.484,-30.873,"Nickel Smelter - Kalgoorlie, WA",POINT (121.48400 -30.87300)
4,136.859,-30.446,"Uranium Mine - Olympic Dam, SA",POINT (136.85899 -30.44600)
5,137.579,-33.011,"Steel Works Blast Furnace - Whyalla, SA",POINT (137.57899 -33.01100)
6,139.481,-20.729,"Copper Smelter - Mount Isa, Qld",POINT (139.48100 -20.72900)
7,144.789,-37.83,"Steel Mill - Laverton, Victoria",POINT (144.79000 -37.83000)
8,145.379,-40.852,"Iron Ore Plant - Port Latta, Tasmania",POINT (145.37900 -40.85200)
9,146.852,-41.129,"Alumina Refinery - Bell Bay, Tasmania",POINT (146.85200 -41.12900)


In [8]:
# This is a processing parameter to load hotspots from a .geojson files within the temporal and spatial bounds provided.
processing_parameters = {
    "nasa_frp": "s3://s3vtaustralia/nasa_hotspots_gdf.geojson",
    "esa_frp": "s3://s3vtaustralia/s3vt_hotspots.geojson",
    "eumetsat_frp": "s3://s3vtaustralia/s3vt_eumetsat_hotspots.geojson",
    "landgate_frp": "s3://s3vtaustralia/landgate_hotspots_gdf.geojson",
    "dea_frp": None,
    "start_date": "2019-11-01",
    "end_date": "2020-10-08",
    "bbox": (147.0, -38.0, 154.0, -27.0),
    "chunks": 300,
    "outdir": outdir,
}

In [11]:
hotspots_pkl_file = Path(outdir).joinpath(
    f"all_hotspots_{int(processing_parameters['bbox'][2])}_{int(processing_parameters['bbox'][0])}_{processing_parameters['start_date'].replace('-','')}_{processing_parameters['end_date'].replace('-','')}.pkl"
)

In [13]:
if not hotspots_pkl_file.exists():
    hotspots_gdf = util.process_hotspots_gdf(**processing_parameters)
    hotspots_gdf.to_pickle(hotspots_pkl_file)
else:
    hotspots_gdf = pd.read_pickle(hotspots_pkl_file)

2021-10-25 01:22:55,962: INFO: Found credentials in environment variables.
2021-10-25 01:22:55,998: INFO: Fetching FRP datasets...
2021-10-25 01:22:55,998: INFO: s3://s3vtaustralia/nasa_hotspots_gdf.geojson exists: skipped download
2021-10-25 01:22:55,999: INFO: s3://s3vtaustralia/s3vt_hotspots.geojson exists: skipped download
2021-10-25 01:22:55,999: INFO: s3://s3vtaustralia/s3vt_eumetsat_hotspots.geojson exists: skipped download
2021-10-25 01:22:56,000: INFO: s3://s3vtaustralia/landgate_hotspots_gdf.geojson exists: skipped download
2021-10-25 01:22:56,000: INFO: dea Hotspots FRP  is None. excluding from analysis.
2021-10-25 01:22:56,000: INFO: Reading...
2021-10-25 01:22:56,000: INFO: reading and subsetting GeoDataFrame for nasa: /home/jovyan/s3vt_dask/s3vtdata/workdir_test3/nasa_hotspots_gdf.geojson
2021-10-25 01:24:43,064: INFO: reading and subsetting GeoDataFrame for esa: /home/jovyan/s3vt_dask/s3vtdata/workdir_test3/s3vt_hotspots.geojson
2021-10-25 01:25:55,826: INFO: reading and

In [14]:
len(hotspots_gdf)

3160676

In [15]:
persistent_hotspots_nearest_df = util.ckdnearest(hotspots_gdf, persistent_hotspots_gdf)

In [16]:
persistent_hotspots_nearest_df["count"] = 1

In [17]:
persistent_hotspots_nearest_df = persistent_hotspots_nearest_df.drop(['geometry', '2_geometry'], axis=1)

In [18]:
dd_persistent_hotspots_nearest_df = dd.from_pandas(persistent_hotspots_nearest_df, npartitions=100)

In [19]:
len(dd_persistent_hotspots_nearest_df)

3160676

In [20]:
persistent_hotspots_nearest_df.dtypes

latitude                           float64
longitude                          float64
satellite                           object
sensor                              object
confidence                         float64
power                              float64
datetime                    datetime64[ns]
solar_day                   datetime64[ns]
satellite_sensor_product            object
solar_night                 datetime64[ns]
2_longitude                        float64
2_latitude                         float64
2_Comment                           object
dist                               float64
count                                int64
dtype: object

In [21]:
dd_persistent_hotspots_nearest_df["dist_m"] = dd_persistent_hotspots_nearest_df.map_partitions(util._distance, meta=persistent_hotspots_nearest_df.dtypes)

In [22]:
dd_persistent_hotspots_nearest_df = dd_persistent_hotspots_nearest_df.compute()

In [23]:
persistentcount = util.pandas_pivot_table(
    dd_persistent_hotspots_nearest_df,
    index=["2_Comment"],
    columns=["satellite_sensor_product"],
    values=["count"],
    aggfunc={"count": np.sum}
    
)

In [24]:
persistentcount

Unnamed: 0_level_0,count,count,count,count,count,count,count,count,count,count,count,count,count
satellite_sensor_product,AQUA_MODIS_LANDGATE,AQUA_MODIS_NASA6.03,NOAA 20_VIIRS_LANDGATE,NOAA 20_VIIRS_NASA2.0NRT,NOAA-19_AVHRR_LANDGATE,SENTINEL_3A_SLSTR_ESA,SENTINEL_3A_SLSTR_EUMETSAT,SENTINEL_3B_SLSTR_ESA,SENTINEL_3B_SLSTR_EUMETSAT,SUOMI NPP_VIIRS_LANDGATE,SUOMI NPP_VIIRS_NASA1,TERRA_MODIS_LANDGATE,TERRA_MODIS_NASA6.03
2_Comment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
"Alumina Refinery - Gladstone, Qld",5494,846,16689,8751,7275,1775,811,3227,2586,22370,8834,4205,768
"Coal Mine - Mandalong, NSW",1906,521,5263,4076,1799,657,327,639,573,5643,5355,1584,361
"Coal Mine - Mt Thorley, NSW",25592,5920,49431,34934,13837,8484,5371,11190,8462,65342,56674,22732,5069
"Coal Mine - Muswellbrook, NSW",82739,19084,186976,82795,56567,34390,13282,49736,22490,224241,176426,60871,15086
"Coal Mine - Oakey Creek, Qld",876,118,3200,1264,1521,215,181,277,214,4379,1055,781,153
"Coal Mine - Singleton, NSW",15557,3473,29483,8498,8976,3957,971,6138,1231,34938,32780,11203,2583
"Coal Mine - Tahmoor, NSW",36582,9809,82605,79479,22026,13680,13363,15204,15332,102557,80817,27266,7242
"Gas Plant - Longford, Victoria",42634,12323,126110,127540,31756,13349,13032,23260,22624,141003,129292,36489,10125
"LNG Plant - Gladstone, Qld",496,63,2068,986,798,111,144,87,128,2621,548,567,67
"Steel Works - Port Kembla, NSW",16454,4901,69005,46223,10554,5686,5591,7073,6956,55226,47687,13338,3721


In [25]:
persistentcount_5000 = util.pandas_pivot_table(
    dd_persistent_hotspots_nearest_df[dd_persistent_hotspots_nearest_df['dist_m'] < 5000],
    index=["2_Comment"],
    columns=["satellite_sensor_product"],
    values=["count"],
    aggfunc={"count": np.sum}
    
)

In [26]:
persistentcount_5000

Unnamed: 0_level_0,count,count,count,count,count,count,count,count,count,count,count,count,count
satellite_sensor_product,AQUA_MODIS_LANDGATE,AQUA_MODIS_NASA6.03,NOAA 20_VIIRS_LANDGATE,NOAA 20_VIIRS_NASA2.0NRT,NOAA-19_AVHRR_LANDGATE,SENTINEL_3A_SLSTR_ESA,SENTINEL_3A_SLSTR_EUMETSAT,SENTINEL_3B_SLSTR_ESA,SENTINEL_3B_SLSTR_EUMETSAT,SUOMI NPP_VIIRS_LANDGATE,SUOMI NPP_VIIRS_NASA1,TERRA_MODIS_LANDGATE,TERRA_MODIS_NASA6.03
2_Comment,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
"Coal Mine - Mandalong, NSW",,,168.0,97.0,,,,,,168.0,55.0,,
"Coal Mine - Mt Thorley, NSW",11.0,3.0,188.0,141.0,5.0,6.0,6.0,1.0,1.0,168.0,102.0,5.0,1.0
"Coal Mine - Muswellbrook, NSW",1.0,,243.0,120.0,11.0,10.0,2.0,11.0,7.0,346.0,148.0,4.0,
"Coal Mine - Singleton, NSW",,,272.0,158.0,50.0,11.0,17.0,7.0,12.0,297.0,116.0,,
"Coal Mine - Tahmoor, NSW",48.0,13.0,69.0,174.0,87.0,48.0,48.0,23.0,23.0,281.0,171.0,18.0,6.0
"Steel Works - Port Kembla, NSW",515.0,59.0,3257.0,1879.0,773.0,22.0,20.0,35.0,33.0,4040.0,1299.0,612.0,102.0
