In [127]:
import matplotlib
import boto3
from botocore.exceptions import ClientError
from netCDF4 import Dataset
import datetime as dt
import json
import geopandas as gpd
import pandas as pd
from shapely.geometry import Polygon
import xmltodict
import yaml
import os
import subprocess
from datetime import date
import logging as logger
import os
import netCDF4

In [138]:
with open(r'config.yaml') as file:
    configuration =  yaml.load(file, Loader=yaml.FullLoader)

In [81]:
def filter(sensors):

    for sensordict in sensors:
        
        filter_string = ''
        count = 0
        
        for sensor in sensordict.keys():
            filter_string = filter_string+'(sensor=%27'+sensor+'%27%20AND%20(product=%27'
            product_count = 0
            for product in sensordict[sensor]:
                filter_string = filter_string+product+'%27'
                if product_count < (len(sensordict[sensor])-1):
                    filter_string = filter_string+'%20OR%20product=%27'
                else:
                    filter_string = filter_string+'))' 
                product_count = product_count + 1
            if count < (len(sensordict.keys())-1):        
                filter_string = filter_string+'%20OR%20'
            count = count+1

    return(filter_string)

In [5]:
def get_polygon_from_gml(gml_dict):
    listoftuples = []
    for i in list(gml_dict.split(" ")):
        pair = (float(i.split(',')[1]), float(i.split(',')[0]))
        listoftuples.append(pair)
    return(listoftuples)

In [122]:
def load_hotspots(filter_string, time_period, bbox, max_features, min_confidence, to_date):
    y_max = bbox[0]
    x_min = bbox[1]
    y_min = bbox[2]
    x_max = bbox[3]
    if to_date is None:
        
        to_date = dt.datetime.now()
    
    logger.info(str(to_date)+' '+str(type(to_date)))
    from_date = (to_date - dt.timedelta(days=time_period)).strftime('%Y-%m-%d')
    
    # trim datetime to enable WFS 
    to_date = to_date.strftime('%Y-%m-%d')
      
    # TODO - sort out paging - looks like there is a limit to WFS requests number returned per query
    logger.info(f"https://hotspots.dea.ga.gov.au/geoserver/public/wfs?service=WFS&version=1.1.0&request=GetFeature&typeName=public:hotspots&outputFormat=application/json&CQL_FILTER=({filter_string})%20AND%20datetime%20%3E%20%27{from_date}%27%20AND%20datetime%20%3C%20%27{to_date}%27%20AND%20INTERSECTS(location,%20POLYGON(({y_max}%20{x_min},%20{y_max}%20{x_max},%20{y_min}%20{x_max},%20{y_min}%20{x_min},%20{y_max}%20{x_min})))&maxFeatures={max_features}&startIndex=0&sortBy=sensor%20A")
    url = f"https://hotspots.dea.ga.gov.au/geoserver/public/wfs?service=WFS&version=1.1.0&request=GetFeature&typeName=public:hotspots&outputFormat=application/json&CQL_FILTER=({filter_string})%20AND%20datetime%20%3E%20%27{from_date}%27%20AND%20datetime%20%3C%20%27{to_date}%27%20AND%20INTERSECTS(location,%20POLYGON(({y_max}%20{x_min},%20{y_max}%20{x_max},%20{y_min}%20{x_max},%20{y_min}%20{x_min},%20{y_max}%20{x_min})))&maxFeatures={max_features}&startIndex=0&sortBy=sensor%20A"
    
    hotspots_gdf = gpd.read_file(url)
    logger.info(str(hotspots_gdf['stop_dt']))
    
    # TODO - improved None value handling  -currently just look at first and apply that to all
    if hotspots_gdf['confidence'][0] == None:
        logger.info('Skipping confidence filter as confidence not populated')
    else:

        # Filter by confidence
        hotspots_gdf = hotspots_gdf.loc[hotspots_gdf.confidence >= min_confidence]

    # Fix datetime
    if hotspots_gdf['start_dt'][0] == None:
        logger.info('Start date field is not populated')
        hotspots_gdf['datetime'] = pd.to_datetime(hotspots_gdf['datetime'])
    else:
        hotspots_gdf['datetime'] = pd.to_datetime(hotspots_gdf['start_dt'])

    # Extract required columns
    hotspots_gdf = hotspots_gdf.loc[:, [
            'datetime', 'latitude', 'longitude', 'confidence', 'geometry', 'product', 'satellite', 'sensor', 'power'
            ]]
    hotspots_gdf.sort_values('datetime', ascending=True, inplace=True)
    logger.info('Hotspots loaded successfully '+str(hotspots_gdf.geometry.total_bounds))

    return(hotspots_gdf)

In [None]:
for config in configuration['configurations']:
    print(config)
    username = config['username']
    password = config['password']
    url = config['url']
    aoi = config['aoi']
    awss3bucket = config['awss3bucket']
    awskeyid = config['awskeyid']
    awskeypass = config['awskeypass']

In [15]:
# Assess inventory against AWS bucket listing

s3 = boto3.resource('s3', aws_access_key_id=awskeyid,
                    aws_secret_access_key=awskeypass)

s3folderlist = []
s3geojsonlist = []
s3bucket = s3.Bucket('s3vtaustralia')

for bucket_object in s3bucket.objects.all():
    s3bucketobject = str(bucket_object.key).split("/")[2]
    if '.SEN3' in s3bucketobject:
        s3folderlist.append(s3bucketobject)
    if '.FRP.geojson' in s3bucketobject:
        s3geojsonlist.append(bucket_object.key)

print(len(s3geojsonlist), "S3 Hotspot files From ESA available in AWS S3") 

184 S3 Hotspot files From ESA available in AWS S3


In [11]:
# Read inventory to geopandas - write to geojson       
        
with open('s3vt_inventory.json') as inventory:
    frames = []
    for p in inventory:
        pages = json.loads(p)
                
        for page in pages:
            for entry in page['feed']['entry']:
                        
                df = pd.DataFrame.from_dict(entry, orient='index')
                        
                polygon = get_polygon_from_gml(xmltodict.parse(entry['str'][2]['content'])['gml:Polygon']['gml:outerBoundaryIs']['gml:LinearRing']['gml:coordinates'])
                
                df = df.transpose()
                df['Coordinates'] = Polygon(polygon)
                for d in entry['str']:
                    if d['name'] ==  'orbitdirection':
                        df['orbitdirection'] = d['content']
                    if d['name'] ==  'platformidentifier':
                        df['platformidentifier'] = d['content'] 
                    if d['name'] ==  'filename':
                        df['filename'] = d['content']
                    if d['name'] ==  'instrumentshortname':
                        df['instrumentshortname'] = d['content']
                    if d['name'] ==  'passnumber':
                        df['passnumber'] = d['content']        
                s3vtdf = gpd.GeoDataFrame(df, geometry='Coordinates')
                
                frames.append(s3vtdf) 
                    
s3vtgpd = pd.concat(frames)

# Not sure why we need to index but do it anyway
s3vtgpd = s3vtgpd.reset_index(drop=True)
s3vtgpd['date'] = pd.to_datetime(s3vtgpd.summary.str.split(",", expand= True)[0].str.split(' ', expand=True)[1])
# Some fields are lists and geojson translation doesn't like it

s3vtgpd = s3vtgpd.drop(['link', 'int', 'str', 'summary'], axis=1)
s3vtgpd.to_file('s3vt_geometry.geojson', driver='GeoJSON')

In [12]:
dataframelength = len(s3vtgpd)
# Add field to enable monitoring
s3vtgpd['hotspot'] = 0
s3vtgpd['download'] = 0
s3vtgpd['s3bucket'] = 0

s3vthostpotsgpdlist = []

In [13]:
# Check if folder already downloaded and flag in gpd
for i in range(dataframelength):
    if s3vtgpd.loc[i]['title']+'.SEN3' in set(s3folderlist):
        s3vtgpd.at[i, 'download'] = 1
    if s3vtgpd.loc[i]['title']+'.FRP.geojson' in set(s3folderlist):
        s3vtgpd.at[i, 'hotspot'] = 1
        s3vthostpotsgpdlist.append(s3hotspotsgpd)

In [16]:
s3geojsonlist

['data/2020-02-27/S3A_SL_2_FRP____20200227T021848_20200227T022148_20200228T070639_0179_055_217_2880_LN2_O_NT_004.FRP.geojson',
 'data/2020-02-27/S3A_SL_2_FRP____20200227T035947_20200227T040247_20200228T084304_0179_055_218_2880_LN2_O_NT_004.FRP.geojson',
 'data/2020-02-27/S3A_SL_2_FRP____20200227T112244_20200227T112544_20200228T151814_0179_055_222_5220_LN2_O_NT_004.FRP.geojson',
 'data/2020-02-27/S3A_SL_2_FRP____20200227T112844_20200227T113144_20200228T151832_0179_055_222_5580_LN2_O_NT_004.FRP.geojson',
 'data/2020-02-27/S3A_SL_2_FRP____20200227T130643_20200227T130943_20200228T165244_0180_055_223_5400_LN2_O_NT_004.FRP.geojson',
 'data/2020-02-27/S3A_SL_2_FRP____20200227T130943_20200227T131243_20200228T165252_0179_055_223_5580_LN2_O_NT_004.FRP.geojson',
 'data/2020-02-27/S3A_SL_2_FRP____20200227T131543_20200227T131742_20200228T165310_0119_055_223_5940_LN2_O_NT_004.FRP.geojson',
 'data/2020-02-27/S3A_SL_2_FRP____20200227T131742_20200227T132042_20200228T165319_0179_055_224_0000_LN2_O_NT_00

In [29]:
# Copy all S3 geojson files locally and load to GPD

for i in s3geojsonlist:
    subprocess.call(['aws', 's3', 'cp', 's3://s3vtaustralia/'+i, i])

In [37]:
frames = []
for i in s3geojsonlist:
    df1 = gpd.read_file(i)
    if 'S3A' in i:
        s3vthotspots['']
    df2 = df1.query("FRP_MWIR>0")
    if len(df2) > 0:
        frames.append(df2)
        
s3vthotspots = pd.concat(frames)
#for i in s3geojsonlist:
#    s3vtdf = gpd.GeoDataFrame(df, geometry='Coordinates')

In [149]:
s3vthotspots['date'] = pd.to_datetime(netCDF4.num2date(s3vthotspots.time, units='microseconds since 2000-01-01T00:00:00Z'))
s3vthotspots['']

In [150]:
s3vthotspots

Unnamed: 0,F1_Fire_pixel_radiance,FRP_MWIR,FRP_SWIR,FRP_uncertainty_MWIR,FRP_uncertainty_SWIR,Glint_angle,IFOV_area,Radiance_window,S7_Fire_pixel_radiance,TCWV,...,n_SWIR_fire,n_cloud,n_water,n_window,time,transmittance_MWIR,transmittance_SWIR,used_channel,geometry,date
0,0.58,356.144925,-1.000000,152.598082,0.000000,9.241301,0.000,0.57,0.72,50.234912,...,-1,0,18,7,636085201255007,0.661942,0.000000,1,POINT (118.58512 4.50691),2020-02-27 02:20:01.255007
1,0.47,35.027970,-1.000000,6.717106,0.000000,17.112879,0.000,0.52,0.74,57.753596,...,-1,0,4,5,636085216245857,0.647840,0.000000,1,POINT (117.34304 3.54743),2020-02-27 02:20:16.245857
0,4.05,93.995624,282.135233,13.802148,346.134798,4.808986,1623262.500,0.58,0.75,47.466614,...,4,0,0,7,636091307791212,0.662275,2.878000,1,POINT (93.43890 1.81046),2020-02-27 04:01:47.791212
1,4.05,94.011671,295.641637,13.913531,362.610621,4.807887,1620098.875,0.58,0.75,47.457786,...,4,0,0,7,636091308091116,0.662431,2.878403,1,POINT (93.42778 1.80831),2020-02-27 04:01:48.091116
2,4.05,94.027479,302.251286,13.916051,370.836687,4.801874,1623262.500,0.58,0.75,47.485311,...,4,0,0,7,636091308091197,0.662212,2.878000,1,POINT (93.43890 1.81046),2020-02-27 04:01:48.091197
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,0.52,4.309601,-1.000000,2.336096,0.000000,176.136222,1133830.625,0.39,0.41,54.391036,...,0,0,20,7,636388961074889,0.628570,0.000000,1,POINT (113.04858 3.28176),2020-03-01 14:42:41.074889
73,0.55,5.047471,-1.000000,3.649828,0.000000,176.062530,1133830.625,0.39,0.40,54.311248,...,0,0,20,7,636388961374888,0.628687,0.000000,1,POINT (113.05547 3.28816),2020-03-01 14:42:41.374888
74,0.43,1.511922,-1.000000,1.189147,0.000000,175.913558,1133830.625,0.39,0.40,54.227513,...,0,0,20,7,636388961674709,0.628659,0.000000,1,POINT (113.07543 3.29944),2020-03-01 14:42:41.674709
75,0.67,8.923738,-1.000000,6.957435,0.000000,176.064095,1133830.625,0.39,0.40,54.233352,...,0,0,20,7,636388961374806,0.628955,0.000000,1,POINT (113.05666 3.29624),2020-03-01 14:42:41.374806


In [41]:
s3vthotspots.to_file('s3vt_hotspots.geojson', driver='GeoJSON')

In [56]:
bounds = list(s3vthotspots.geometry.total_bounds)
bbox = [bounds[3], bounds[0], bounds[1], bounds[2]]
bbox

[11.751792970329575,
 -171.17838656781612,
 -44.941232886534294,
 176.88213855341553]

In [113]:
netCDF4.num2date(636085201255007, units='microseconds since 2000-01-01T00:00:00Z')

datetime.datetime(2020, 2, 27, 2, 20, 1, 255007)

In [142]:
for config in configuration['configurations']:
    
    hotspots_gdf = load_hotspots(filter(config['sensors']),
                                         config['time_period'],
                                         bbox,
                                         config['max_features'], 
                                         config['min_confidence'],
                                         config['to_date'])

In [143]:
os.remove('DEAHotspots_hotspots.geojson')
hotspots_gdf.to_file('DEAHotspots_hotspots.geojson', driver='GeoJSON')

In [144]:
'2018-01-01T00:00:00.000Z'

In [114]:
#!aws s3 cp 's3://s3vtaustralia/data/2020-02-29/S3B_SL_2_FRP____20200229T113450_20200229T113750_20200301T152757_0179_036_108_5400_LN2_O_NT_004.SEN3/FRP_in.nc' 'FRP_in.nc' 