#### Purpose: Derive a measure for when a flood event is sufficiently captured 

Creates and saves the dataframe "flooddays_with_sentinel_and_capture_df"

Method:

* metric based on overlap between flood extent and the ground-level swath
* we use the "point in polygon" analysis available from shapely (the same thing used by Geopandas)

Assumption made: the bounding boxes of Sentinel ground coverage do not overlap in such a way that would lead to counting the same coverage of the same flood extent pixels more than once

In [1]:
import pandas as pd
import pickle
import rasterio
import numpy as np
import shapely

tif_root = "STEP 1 - Data Acquisition/Global Flood Database/TIF/unzipped/"

def wgs_flood_coordlist_from_img(img, transfm):
    """
    given a binary image and the parameters for an (affine) transformation, this function will
    return the WGS684 pixel coordinates of all the positive pixels
    """

    # first find all the extent pixels in the image - those with value above zero
    indices = np.where(img >0)
    coordinates = zip(indices[0], indices[1])

    # then transform the pixel coords into WGS
    wgs_list = []
    for xy in list(coordinates):
        (wgs_x, wgs_y) = rasterio.transform.xy(transfm,xy[0],xy[1])
        wgs_list.append((wgs_x, wgs_y))
    
    return wgs_list

def capture_metrics(list_of_coords, bbox):
    """
    Given a list of coordinates in the wgs84 stystem, and the 4 corners of a bounding box, this function returns
    a count and the percentage of the coords in the list, which fall inside the bounding box
    """
    polygon = shapely.geometry.Polygon(bbox)

    count=0
    for wgs_point in list_of_coords:
        point = shapely.geometry.Point(wgs_point)
        count+=point.within(polygon)
    
    return count, count/len(list_of_coords)

def bb2shapely(bbox):
    """
    function to convert [xmin, ymin, xmax, ymax] list into shapely bounding box structure
    """
    return [
        (bbox[0],bbox[1]),
        (bbox[2],bbox[1]),
        (bbox[2],bbox[3]),
        (bbox[0],bbox[3])
    ]

Get the master dataframe 

In [2]:
# dataframe of daily flood images and sentinel coverage as bbox data
with open('flooddays_with_sentinel_df.pkl', 'rb') as f:
    flooddays_with_sentinel_df = pickle.load(f) 
f.close()

flooddays_with_sentinel_df.head()

Unnamed: 0_level_0,DFO_id,flood_day,tif_filename,flood_year,flood_start,reported_duration,observed_total_duration,snapshot_date,snapshot_extent_img,snapshot_extent_km2,...,displaced_k,duration_days,exposed_mn,killed,start_date,end_date,dfo_severity,wsg84_bbox,sentinel_coverage,sentinel_coverage_Nboxes
DFO_day_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DFO_4632_0,DFO_4632,0,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-15,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",7513.3125,...,1000,5,1207989,1,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...","{0: [91.9456071436343, 29.77601126852889, 94.8...",5
DFO_4632_1,DFO_4632,1,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-16,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",5362.25,...,1000,5,1207989,1,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...",{},0
DFO_4632_2,DFO_4632,2,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-17,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",2904.8125,...,1000,5,1207989,1,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...","{0: [95.9592518987471, 29.26117989441572, 98.8...",5
DFO_4632_3,DFO_4632,3,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-18,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",1152.75,...,1000,5,1207989,1,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...",{},0
DFO_4632_4,DFO_4632,4,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-19,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",652.375,...,1000,5,1207989,1,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...","{0: [100.28209710959871, 30.251160866016946, 1...",5


In [3]:
DFO_day_id = []
capture_pixel_count = []
capture_percent = []
capture_area_km = []
captured_any = []

for index, row in flooddays_with_sentinel_df.iterrows():

    raster=rasterio.open(tif_root + row['tif_filename'])
    spatial_transform = raster.meta['transform']
    flood_extent_coords = wgs_flood_coordlist_from_img(row['snapshot_extent_img'], spatial_transform)
    
    # loop over 0, 1 or more than 1 sentinel bbox that has been identified to have spatio-temporal overlap with the corresponding GFD image
    # sum the overlaps within one or more Sentinel bounding boxes.
    # HUOM! Assumption made here: the bounding boxes do not overlap in such a way that counts the same coverage of the same flood extent pixels more than once
    n_pix = 0
    percent_of_extent = 0
    for i in range(row['sentinel_coverage_Nboxes']):
        bbox = row['sentinel_coverage'][i]
        n, pc = capture_metrics(flood_extent_coords, bb2shapely(bbox))
        n_pix += n
        percent_of_extent += pc

    DFO_day_id.append(index)
    capture_pixel_count.append(n_pix)
    capture_percent.append(percent_of_extent)
    capture_area_km.append(n_pix*0.0625) # each pixel is 250mx250m = one sixteenth of a km2
    captured_any.append(min(1,n_pix)) # truncate the number of captured pixels to 0 or 1 only



capture_df = pd.DataFrame({'DFO_day_id' :DFO_day_id,
                           'capture_pixel_count':capture_pixel_count,
                           'capture_percent':capture_percent,
                           'capture_area_km':capture_area_km,
                           'captured_any':captured_any}).set_index('DFO_day_id')

capture_df.head()

Unnamed: 0_level_0,capture_pixel_count,capture_percent,capture_area_km,captured_any
DFO_day_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
DFO_4632_0,3389,0.028192,211.8125,1
DFO_4632_1,0,0.0,0.0,0
DFO_4632_2,63,0.001356,3.9375,1
DFO_4632_3,0,0.0,0.0,0
DFO_4632_4,0,0.0,0.0,0


... join on to the master data

In [4]:
print("shape before :",flooddays_with_sentinel_df.shape)
#simple join because both dataframes have the unique DFO_day_id as index
flooddays_with_sentinel_df=flooddays_with_sentinel_df.join(capture_df)
print("shape after :",flooddays_with_sentinel_df.shape)
flooddays_with_sentinel_df.head()

shape before : (61, 21)
shape after : (61, 25)


Unnamed: 0_level_0,DFO_id,flood_day,tif_filename,flood_year,flood_start,reported_duration,observed_total_duration,snapshot_date,snapshot_extent_img,snapshot_extent_km2,...,start_date,end_date,dfo_severity,wsg84_bbox,sentinel_coverage,sentinel_coverage_Nboxes,capture_pixel_count,capture_percent,capture_area_km,captured_any
DFO_day_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
DFO_4632_0,DFO_4632,0,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-15,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",7513.3125,...,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...","{0: [91.9456071436343, 29.77601126852889, 94.8...",5,3389,0.028192,211.8125,1
DFO_4632_1,DFO_4632,1,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-16,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",5362.25,...,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...",{},0,0,0.0,0.0,0
DFO_4632_2,DFO_4632,2,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-17,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",2904.8125,...,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...","{0: [95.9592518987471, 29.26117989441572, 98.8...",5,63,0.001356,3.9375,1
DFO_4632_3,DFO_4632,3,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-18,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",1152.75,...,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...",{},0,0,0.0,0.0,0
DFO_4632_4,DFO_4632,4,DFO_4632_From_20180615_to_20180620.tif,2018,2018-06-15,5,8,2018-06-19,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",652.375,...,2018-06-15,2018-06-20,1.5,"[90.63215190892367, 32.771664458785295, 108.43...","{0: [100.28209710959871, 30.251160866016946, 1...",5,0,0.0,0.0,0


In [5]:
with open('flooddays_with_sentinel_and_capture_df.pkl', 'wb') as f:  
    pickle.dump(flooddays_with_sentinel_df, f) 

f.close()