In [8]:
import ee
import pandas as pd
import asset_paths_to_cop_data as fc_paths
import make_difference_images as di_module

ee.Initialize()

In [9]:
def import_FCs():
    subsite_filenames = [y for x in fc_paths.ALL_FILENAMES for y in x]
    poly_data = []
    for ss_fc_tuple in subsite_filenames:
        ss_name = ss_fc_tuple[0].split("/")[3]
        roi = ee.FeatureCollection(ss_fc_tuple[0])
        flood_poly = ee.FeatureCollection(ss_fc_tuple[1])
        poly_data.append([ss_name, roi, flood_poly])
    return poly_data

In [10]:
def get_S1_images(all_geometries):
    return ee.ImageCollection('COPERNICUS/S1_GRD') \
        .filter(ee.Filter.eq('instrumentMode', 'IW')) \
        .filter(ee.Filter.listContains('transmitterReceiverPolarisation', 'VV')) \
        .filter(ee.Filter.eq('orbitProperties_pass', 'DESCENDING')) \
        .filterBounds(all_geometries) \
        .select(['VV'])

In [11]:
def create_labelled_dataset(poly_and_di_df):
    labelled_data = []
    for i, row in poly_and_di_df.iterrows():
        geom, flood, di = row['Geometry'], row['Flood Poly'], row['Difference Image']
        bounds = ee.FeatureCollection(geom.geometry().bounds())
        empty = ee.Image.constant(0).byte().clip(bounds)
        painted = empty.paint(flood, 1)
        di_with_flood = di.addBands(painted.clip(bounds))
        training = di_with_flood.stratifiedSample(3000,'constant', bounds,10,'EPSG:4326')
        labelled_data.append([row['Subsites'], training])
    final_df = poly_and_di_df.merge(pd.DataFrame(labelled_data, columns=['Subsites', 'Labelled Data']))
    return final_df

In [12]:
def batch_export_tfrecords_to_GCS(collection_to_export, file_name_prefix, output_features):
    task = ee.batch.Export.table.toCloudStorage(
        collection=collection_to_export,
        bucket='labelled_data',
        fileFormat='TFRecord',
        fileNamePrefix=file_name_prefix,
        selectors=output_features
    )
    task.start()

In [13]:
poly_data = import_FCs()
poly_df = pd.DataFrame(poly_data, columns=['Subsites', 'Geometry', 'Flood Poly'])
all_geometries = ee.FeatureCollection(poly_df['Geometry'].tolist()).flatten()

In [14]:
images = get_S1_images(all_geometries)
poly_and_di_df = di_module.add_DIs(poly_df, images)

In [15]:
final_df = create_labelled_dataset(poly_and_di_df)

In [27]:
labelled_data = ee.FeatureCollection(final_df['Labelled Data'].tolist()).flatten().randomColumn()


In [29]:
training_partition = labelled_data.filter(ee.Filter.lt('random', 0.7))
test_partition = labelled_data.filter(ee.Filter.gte('random', 0.7))

In [30]:

batch_export_tfrecords_to_GCS(training_partition, 'py6_training_' , ['VV', 'constant'])
batch_export_tfrecords_to_GCS(test_partition, 'py6_test_', ['VV', 'constant'])