In [25]:
import ee
from ee import batch
import pandas as pd

In [26]:
# Initialize the Earth Engine module.
ee.Initialize()

In [27]:
df = pd.read_csv('../data/label.csv').head(1)

# Define your location and date range.
longitude = df['centroid_longitude'].values[0]
latitude   =df['centroid_latitude'].values[0]
start_date, end_date = '2020-12-01', '2020-12-31'

In [38]:
prefix = 'YOUR_FOLDER_NAME'
fname = 'YOUR_FILENAME'
scale = 20  # meters
ksize = 112  # half the size of the desired patch in pixels (225 / 2)
bands = ['B1', 'B2', 'B3', 'B4', 'B8', 'QA60']  # Including the QA60 band
scale_factor = 10000  # Scaling factor for Sentinel-2 reflectance values

# Define the point of interest and create a buffer region around it
point = ee.Geometry.Point([longitude, latitude])
buffered_point = point.buffer(scale * ksize)  # Adjust buffer size as needed

# Function to estimate cloud coverage over the ROI
def cloudiness(image):
    qa_band = image.select('QA60')
    opaque_clouds = qa_band.bitwiseAnd(1 << 10).neq(0)
    cirrus_clouds = qa_band.bitwiseAnd(1 << 11).neq(0)
    total_cloud_mask = opaque_clouds.Or(cirrus_clouds)
    cloud_coverage_ratio = total_cloud_mask.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=buffered_point,
        scale=scale
    ).get('QA60')
    return image.set('cloudiness', cloud_coverage_ratio)

# Load the Sentinel-2 image collection, filter, estimate cloudiness, and select least cloudy
collection = ee.ImageCollection('COPERNICUS/S2') \
    .filterDate(start_date, end_date) \
    .filterBounds(buffered_point) \
    .map(cloudiness)

# Select the least cloudy image
least_cloudy_image = collection.sort('cloudiness').first()

# Apply band selection and scaling to the chosen image
processed_image = least_cloudy_image.select(bands).divide(scale_factor)

# Retrieve the cloudiness value and date of the image
cloudiness_value = ee.Number(least_cloudy_image.get('cloudiness')).getInfo()
date = ee.Date(least_cloudy_image.get('system:time_start')).format('YYYY-MM-dd').getInfo()

# Function to sample patch and include additional properties
def sample_patch(point, patches_array, scale, cloudiness_value, date, longitude, latitude):
    arrays_samples = patches_array.sample(
        region=point.geometry(),
        scale=scale,
        projection='EPSG:3857',
        factor=None,
        numPixels=None,
        dropNulls=False,
        tileScale=12)
    sample_feature = arrays_samples.first()
    # Set additional properties to the feature
    sample_feature = sample_feature.set({
        'cloudiness': cloudiness_value,
        'date': date,
        'longitude': longitude,
        'latitude': latitude
    })
    return sample_feature

# Function to get array patches
def get_array_patches(img, scale, ksize, points, prefix, fname, cloudiness_value, date, longitude, latitude, selectors=None, dropselectors=None):
    kern = ee.Kernel.square(radius=ksize, units='pixels')
    patches_array = img.neighborhoodToArray(kern)
    samples = points.map(lambda pt: sample_patch(pt, patches_array, scale, cloudiness_value, date, longitude, latitude))

    # Export to TFRecord file
    task = ee.batch.Export.table.toDrive(
        collection=samples,
        description=prefix,
        folder=prefix,  
        fileNamePrefix=fname,
        fileFormat='TFRecord'
    )
    return task

# Define the point of interest as a feature collection
points = ee.FeatureCollection([ee.Feature(point, {'longitude': longitude, 'latitude': latitude})])

# Set up the export task
task = get_array_patches(
    processed_image,
    scale,
    ksize,
    points,
    prefix,
    fname,
    cloudiness_value,
    date,
    longitude,
    latitude
)

# Start the export
task.start()