<a href="https://colab.research.google.com/github/lawrencejesse/Sentinel2_Extractor/blob/main/Sentinel2_RasterExtractor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Packaging and Download

### Subtask:
Zip the exported GeoTIFFs in the `/content/exports` directory and provide a download link for the zip file.

**Reasoning**:
Use the `zip` command to create a zip archive of the exported files and then provide a link to download the created zip file.

In [None]:
import os
from google.colab import files

def zip_and_download_exports():
    export_dir = '/content/exports'
    zip_filename = 'ndvi_exports.zip'
    zip_filepath = f'/content/{zip_filename}'

    if os.path.exists(export_dir) and os.listdir(export_dir):
        # Create a zip archive of the exported files
        !zip -r $zip_filepath $export_dir

        # Provide a download link for the zip file
        print(f"Your files are ready for download. Click the link below to download {zip_filename}")
        files.download(zip_filepath)
    else:
        print("No files found in the export directory to zip.")

# You can call this function after the export tasks are complete and the files are in the /content/exports directory.
# Note: Earth Engine exports to Drive are asynchronous. You'll need to wait for them to complete before zipping.
# A more robust solution would involve monitoring the tasks, but for simplicity, you can manually run this cell after exports are finished.

## Data Export

### Subtask:
Implement the export logic, including handling individual image export and optional mean NDVI export.

**Reasoning**:
Define a function `export_ndvi` that takes the processed collection, AOI, start date, end date, and the mean NDVI toggle status as input. Inside the function, check if the mean NDVI toggle is on. If so, compute the mean NDVI for the collection and export it. Otherwise, iterate through the collection and export each image individually, clipping to the AOI and using descriptive filenames.

In [None]:
import os
import ee.batch

def export_ndvi(processed_collection, aoi, start_date, end_date, export_mean=False):
    """
    Exports the processed NDVI images as GeoTIFFs.

    Args:
        processed_collection (ee.ImageCollection): The processed Sentinel-2 image collection with NDVI.
        aoi (ee.FeatureCollection or ee.Geometry): The area of interest.
        start_date (str): The start date in 'YYYY-MM-DD' format.
        end_date (str): The end date in 'YYYY-MM-DD' format.
        export_mean (bool): Whether to export the mean NDVI for the date range.
    """
    export_dir = '/content/exports'
    if not os.path.exists(export_dir):
        os.makedirs(export_dir)

    if export_mean:
        print("Exporting mean NDVI...")
        mean_ndvi_image = processed_collection.mean().clip(aoi)
        filename = f'NDVI_Mean_{start_date.replace("-", "")}_{end_date.replace("-", "")}.tif'
        task = ee.batch.Export.image.toDrive(
            image=mean_ndvi_image,
            description=filename,
            folder='earth_engine_exports', # You can change this folder name in your Google Drive
            fileNamePrefix=filename.replace('.tif', ''),
            scale=10,
            region=aoi.geometry(),
            fileFormat='GeoTIFF',
            crs='EPSG:4326'
        )
        task.start()
        print(f"Mean NDVI export task started: {task.id}")
    else:
        print("Exporting individual NDVI images...")
        image_list = processed_collection.toList(processed_collection.size())

        for i in range(image_list.size().getInfo()):
            image = ee.Image(image_list.get(i)).clip(aoi)
            image_date = ee.Image(image_list.get(i)).date().format('YYYYMMDD').getInfo()
            filename = f'NDVI_{image_date}.tif' # AOINAME is not available, using date only
            task = ee.batch.Export.image.toDrive(
                image=image,
                description=filename,
                folder='earth_engine_exports', # You can change this folder name in your Google Drive
                fileNamePrefix=filename.replace('.tif', ''),
                scale=10,
                region=aoi.geometry(),
                fileFormat='GeoTIFF',
                crs='EPSG:4326'
            )
            task.start()
            print(f"Export task for {filename} started: {task.id}")

## Sentinel-2 Data Processing

### Subtask:
Define a function to build the Sentinel-2 collection, apply a cloud mask using the SCL band, and compute the NDVI.

**Reasoning**:
Define a function `process_sentinel2` that takes the AOI, start date, and end date as input. Inside the function, filter the Sentinel-2 collection by date and bounds, apply the cloud mask, and compute the NDVI.

In [None]:
def process_sentinel2(aoi, start_date, end_date):
    """
    Builds the Sentinel-2 collection, applies cloud masking, and computes NDVI.

    Args:
        aoi (ee.FeatureCollection or ee.Geometry): The area of interest.
        start_date (str): The start date in 'YYYY-MM-DD' format.
        end_date (str): The end date in 'YYYY-MM-DD' format.

    Returns:
        ee.ImageCollection: The processed Sentinel-2 image collection with NDVI and original bands.
    """
    print(f"Processing Sentinel-2 for AOI, dates: {start_date} to {end_date}") # Debug print
    # Filter Sentinel-2 SR Harmonized collection by date and bounds
    collection = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
        .filterDate(start_date, end_date) \
        .filterBounds(aoi)

    print(f"Initial collection size: {collection.size().getInfo()}") # Debug print

    # Function to mask clouds using the Sentinel-2 SCL band
    def mask_clouds(image):
        scl = image.select('SCL')
        # These are the pixel values for clouds, cloud shadows, and snow.
        cloud_mask = scl.neq(3).And(scl.neq(8)).And(scl.neq(9)).And(scl.neq(10)).And(scl.neq(11))
        return image.updateMask(cloud_mask)

    # Apply cloud mask and compute NDVI
    def compute_ndvi(image):
        # Scale reflectance bands
        scaled_image = image.select(['B4', 'B8']).multiply(0.0001)
        ndvi = scaled_image.normalizedDifference(['B8', 'B4']).rename('NDVI')
        # Add the NDVI band to the original image
        return image.addBands(ndvi)

    # Apply cloud mask and compute NDVI, keeping all bands
    processed_collection = collection.map(mask_clouds).map(compute_ndvi)

    print(f"Processed collection size (after masking and NDVI): {processed_collection.size().getInfo()}") # Debug print

    # Return the collection with all bands (including NDVI)
    return processed_collection

# Task
Build a Google Colab notebook in Python that allows users to upload a polygon file (KML, GeoJSON, or zipped Shapefile), parse it as an AOI in WGS84, preview the AOI on a map, select a date range using widgets, download cloud-masked Sentinel-2 NDVI GeoTIFFs for the AOI and date range (clipped to AOI if possible), save them with descriptive filenames to `/content/exports`, zip the files, and provide a download link. Include an optional toggle to export a single mean NDVI for the date range. The notebook should use `earthengine-api`, `geemap`, and `ipywidgets`, authenticate with Earth Engine, use Sentinel-2 SR Harmonized (COPERNICUS/S2_SR_HARMONIZED) with SCL cloud masking, compute NDVI after scaling reflectance by 0.0001, and output GeoTIFFs at 10m scale in EPSG:4326.

## Setup and authentication

### Subtask:
Install necessary libraries (`earthengine-api`, `geemap`, `ipywidgets`), authenticate with Earth Engine, and initialize it.


**Reasoning**:
Install the necessary libraries using pip.



In [None]:
%pip install -U -q earthengine-api geemap ipywidgets

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/463.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m463.2/463.2 kB[0m [31m14.9 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/631.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m631.3/631.3 kB[0m [31m34.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.8/139.8 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m79.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m68.3 MB/s[0m eta [36m0:00:00[0m
[?25h

**Reasoning**:
Authenticate and initialize Earth Engine.



In [None]:
import ee

try:
    ee.Initialize()
    print('Google Earth Engine has been successfully initialized.')
except Exception as e:
    print(f'Failed to initialize Google Earth Engine: {e}')
    print('Please authenticate with Google Earth Engine by running ee.Authenticate() in a separate cell.')


Failed to initialize Google Earth Engine: Please authorize access to your Earth Engine account by running

earthengine authenticate

in your command line, or ee.Authenticate() in Python, and then retry.
Please authenticate with Google Earth Engine by running ee.Authenticate() in a separate cell.


**Reasoning**:
Authenticate Earth Engine as instructed by the error message.



In [None]:
ee.Authenticate()

**Reasoning**:
Re-initialize Earth Engine after successful authentication.



In [None]:
try:
    ee.Initialize()
    print('Google Earth Engine has been successfully initialized after authentication.')
except Exception as e:
    print(f'Failed to initialize Google Earth Engine after authentication: {e}')

Failed to initialize Google Earth Engine after authentication: ee.Initialize: no project found. Call with project= or see http://goo.gle/ee-auth.


**Reasoning**:
Re-initialize Earth Engine with a project ID as instructed by the error message.



In [None]:
try:
    ee.Initialize(project='jessemapping')
    print('Google Earth Engine has been successfully initialized with a project.')
except Exception as e:
    print(f'Failed to initialize Google Earth Engine with a project: {e}')

Google Earth Engine has been successfully initialized with a project.


*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_7TDKVSyKvBdmMqW?ref=4i2o6


## Aoi upload and visualization

### Subtask:
Create functionality to upload a polygon file (KML, GeoJSON, or zipped Shapefile), parse it as an Earth Engine feature collection in WGS84, and display it on an interactive map using `geemap` with a satellite basemap, ensuring it's centered and visible.


**Reasoning**:
Import necessary libraries and create a file upload widget.



In [None]:
import ipywidgets as widgets
import geemap
import os

# Global variable to store the AOI name
aoi_name_global = None

# Define the handle_upload function here
def handle_upload(change):
    print("handle_upload function triggered.") # Debug print
    uploaded_file = upload_widget.value[0]
    file_name = uploaded_file['name']
    file_content = uploaded_file['content']

    global aoi # Declare aoi as global
    global aoi_name_global # Declare aoi_name_global as global
    aoi = None # Reset aoi
    aoi_name_global = None # Reset aoi name
    print(f"Processing file: {file_name}") # Debug print

    try:
        # Store the AOI name (cleaned filename without extension)
        aoi_name_global = os.path.splitext(file_name)[0]
        # Replace spaces and special characters if needed for a valid filename
        aoi_name_global = aoi_name_global.replace(" ", "_").replace("-", "_") # Example cleaning

        if file_name.endswith('.kml'):
            # Save content to a temporary KML file
            with open(file_name, 'wb') as f:
                f.write(file_content)
            aoi = geemap.kml_to_ee(file_name)
            os.remove(file_name) # Clean up temporary file
        elif file_name.endswith('.geojson'):
            # Save content to a temporary GeoJSON file
            with open(file_name, 'wb') as f:
                f.write(file_content)
            aoi = geemap.geojson_to_ee(file_name)
            os.remove(file_name) # Clean up temporary file
        elif file_name.endswith('.zip'):
            # Save content to a temporary zip file
            with open(file_name, 'wb') as f:
                f.write(file_content)
            aoi = geemap.shp_to_ee(file_name)
            os.remove(file_name) # Clean up temporary file
        else:
            print("Unsupported file type.")
            aoi_name_global = None # Reset if unsupported
            return

        if aoi:
            # Ensure the AOI is in WGS84 (EPSG:4326) with a non-zero error margin
            aoi = aoi.geometry().transform('EPSG:4326', 1) # Added maxError=1
            print("AOI uploaded and processed successfully.") # Debug print
            if aoi_name_global:
                print(f"AOI Name: {aoi_name_global}")
            print("Run the next cell to display the map.")
        else:
            print("Failed to process the uploaded file: AOI is None.") # Debug print
            aoi_name_global = None # Reset if processing fails

    except Exception as e:
        print(f"An error occurred during file processing: {e}") # Debug print
        aoi = None # Ensure aoi is None if processing fails
        aoi_name_global = None # Reset if processing fails


upload_widget = widgets.FileUpload(
    accept='.kml,.geojson,.zip',
    multiple=False
)

display(upload_widget)

# Attach the observer to the widget
upload_widget.observe(handle_upload, names='value')

FileUpload(value={}, accept='.kml,.geojson,.zip', description='Upload')

**Reasoning**:
Define the function to handle file uploads, parse the file as an Earth Engine feature collection, and display it on a map.



**Reasoning**:
Add a new cell to display the map with the uploaded AOI after the file has been processed by the `handle_upload` function.

In [None]:
# Display the AOI on a map after upload
if 'aoi' in globals() and aoi is not None:
    m = geemap.Map(basemap='SATELLITE')
    m.addLayer(ee.FeatureCollection(aoi), {}, 'Uploaded AOI')
    m.centerObject(aoi)
    display(m)
else:
    print("Please upload an AOI using the widget above first.")

Please upload an AOI using the widget above first.


## Interactive widgets for date and export

### Subtask:
Add `ipywidgets` for selecting a start and end date and a button to trigger the data export process. Include a toggle for exporting the mean NDVI.


In [None]:
from datetime import datetime
import ipywidgets as widgets
from IPython.display import display, clear_output
import ee
import ee.batch
import os

# Global variables used by functions in cell 8e6c47aa
# available_dates_dict = {}
# processed_collection_global = None

# The get_available_dates function and the on_click binding for get_dates_button
# have been moved to cell 8e6c47aa for better organization.

# This cell can now be used for other imports or definitions if needed,
# or it can be left with just necessary imports and global variable declarations.
# The core logic for date selection widgets and button interaction is now in cell 8e6c47aa.

# Note: The process_sentinel2 function is assumed to be defined in another cell (e.g., cell 6e2d21a4).
# Ensure that cell is run before running cell 8e6c47aa.

**Reasoning**:
Create and display the date picker widgets, the mean NDVI checkbox, and the export button as specified in the instructions.



In [None]:
# --- Colab / ipywidgets friendly UI for S2 date picking + NDVI/RGB exports ---

from datetime import datetime
import ipywidgets as widgets
from IPython.display import display
import ee, ee.batch

# (Colab) Make sure custom widget manager is enabled
try:
    from google.colab import output as _colab_output
    _colab_output.enable_custom_widget_manager()
except Exception:
    pass

# -----------------------
# Globals used across UI
# -----------------------
available_dates_dict = {}
processed_collection_global = None

# If your notebook set these earlier, they will be used here.
# aoi: ee.Geometry/Feature/FeatureCollection (required)
# aoi_name_global: str, optional nickname for filenames

# -----------------------
# UI callbacks and logic
# -----------------------
def get_available_dates(_):
    """
    Finds cloud-free S2 images between chosen dates (using your process_sentinel2),
    stores them globally, and renders the date-selection/export panel.
    """
    global available_dates_dict, processed_collection_global

    if 'aoi' not in globals() or aoi is None:
        print("Please upload/define an AOI first.")
        return

    start_date_str = start_date_widget.value.strftime('%Y-%m-%d')
    end_date_str   = end_date_widget.value.strftime('%Y-%m-%d')

    # process_sentinel2(aoi, start_date_str, end_date_str) must return an ImageCollection
    # with NDVI already added (band name 'NDVI') and standard S2 bands (B2..B12).
    processed_collection_global = process_sentinel2(aoi, start_date_str, end_date_str)

    # Pull system:time_start and show human-readable dates
    try:
        info_list = processed_collection_global.toList(
            processed_collection_global.size()
        ).getInfo()

        # map date_str -> time_start_millis
        tmp = {}
        for img_info in info_list:
            ts_ms = img_info['properties']['system:time_start']
            date_str = datetime.utcfromtimestamp(ts_ms / 1000).strftime('%Y-%m-%d')
            tmp[date_str] = ts_ms

        # sort by date
        ordered_dates = sorted(tmp.keys())
        available_dates_dict = {d: tmp[d] for d in ordered_dates}

    except Exception as e:
        print(f"Error reading collection dates: {e}")
        available_dates_dict = {}

    if available_dates_dict:
        display_date_selection_widgets()
    else:
        print("No cloud-free images found for the selected range and AOI.")

def display_date_selection_widgets():
    """
    Renders one clean VBox panel so Colab doesn't bury controls.
    """
    global date_selector, export_button, output_folder_widget, export_rgb_toggle

    date_selector = widgets.SelectMultiple(
        options=list(available_dates_dict.keys()),
        description='Select Dates:',
        rows=min(10, max(1, len(available_dates_dict))),
        layout=widgets.Layout(width='50%')
    )

    output_folder_widget = widgets.Text(
        value='earth_engine_exports',
        description='Output Folder:',
        layout=widgets.Layout(width='50%')
    )

    export_rgb_toggle = widgets.Checkbox(
        value=False,
        description='Also export RGB (B4,B3,B2) for each selected date',
        indent=False
    )

    export_button = widgets.Button(
        description='Start Export',
        button_style='success',
        tooltip='Export NDVI (and RGB if toggled)',
        icon='download'
    )
    export_button.on_click(on_export_button_clicked)

    export_panel = widgets.VBox([
        widgets.HTML("<b>Step 2: Pick dates and export</b>"),
        date_selector,
        output_folder_widget,
        export_rgb_toggle,
        export_button
    ])

    display(export_panel)

def on_export_button_clicked(_):
    """
    Exports either:
      - Mean NDVI across the date range (if 'Export Mean NDVI Only' is on), OR
      - Per-date NDVI (and optional RGB) for selected dates.
    """
    global processed_collection_global, available_dates_dict

    if 'aoi' not in globals() or aoi is None:
        print("Please upload/define an AOI first.")
        return
    if processed_collection_global is None:
        print("Please click 'Get Available Dates' first.")
        return

    selected_dates = list(date_selector.value) if 'date_selector' in globals() else []
    export_mean = mean_ndvi_toggle.value
    export_rgb  = export_rgb_toggle.value if 'export_rgb_toggle' in globals() else False
    output_folder = output_folder_widget.value if 'output_folder_widget' in globals() else 'earth_engine_exports'

    aoi_name = (aoi_name_global if 'aoi_name_global' in globals() and aoi_name_global
                else "AOI")

    # If no dates chosen and mean not requested, stop
    if not selected_dates and not export_mean:
        print("Please select at least one date or enable 'Export Mean NDVI Only'.")
        return

    # Mean NDVI branch
    if export_mean:
        if export_rgb:
            print("Note: RGB export is not applicable to Mean NDVI. Exporting Mean NDVI only.")
        print("Exporting Mean NDVI...")
        mean_img = processed_collection_global.select('NDVI').mean().clip(aoi)
        start_date_str = start_date_widget.value.strftime('%Y-%m-%d')
        end_date_str   = end_date_widget.value.strftime('%Y-%m-%d')
        fname = f'{aoi_name}_NDVI_Mean_{start_date_str}_{end_date_str}.tif'

        task = ee.batch.Export.image.toDrive(
            image=mean_img,
            description=fname,
            folder=output_folder,
            fileNamePrefix=fname.replace('.tif', ''),
            scale=10,
            region=aoi,
            fileFormat='GeoTIFF',
            crs='EPSG:4326'
        )
        task.start()
        print(f"Mean NDVI export started: {task.id}")
        return

    # Per-date exports branch
    print("Exporting selected per-date images...")
    selected_ts = [available_dates_dict[d] for d in selected_dates if d in available_dates_dict]
    sel_coll = processed_collection_global.filter(ee.Filter.inList('system:time_start', selected_ts))

    size = sel_coll.size().getInfo()
    print(f"Selected collection has {size} image(s).")
    if size == 0:
        print("No images matched the selected dates.")
        return

    imgs = sel_coll.toList(size)
    for i in range(size):
        img = ee.Image(imgs.get(i)).clip(aoi)
        date_str = ee.Image(imgs.get(i)).date().format('YYYY-MM-dd').getInfo()

        # NDVI export
        ndvi = img.select('NDVI')
        ndvi_name = f'{aoi_name}_NDVI_{date_str}.tif'
        t_ndvi = ee.batch.Export.image.toDrive(
            image=ndvi,
            description=ndvi_name,
            folder=output_folder,
            fileNamePrefix=ndvi_name.replace('.tif', ''),
            scale=10,
            region=aoi,
            fileFormat='GeoTIFF',
            crs='EPSG:4326'
        )
        t_ndvi.start()
        print(f"NDVI export started: {ndvi_name} -> task {t_ndvi.id}")

        # Optional RGB export (B4,B3,B2)
        if export_rgb:
            rgb = img.select(['B4', 'B3', 'B2'])
            rgb_name = f'{aoi_name}_RGB_{date_str}.tif'
            t_rgb = ee.batch.Export.image.toDrive(
                image=rgb,
                description=rgb_name,
                folder=output_folder,
                fileNamePrefix=rgb_name.replace('.tif', ''),
                scale=10,
                region=aoi,
                fileFormat='GeoTIFF',
                crs='EPSG:4326'
            )
            t_rgb.start()
            print(f"RGB export started: {rgb_name} -> task {t_rgb.id}")

# -----------------------
# Initial controls panel
# -----------------------
start_date_widget = widgets.DatePicker(description='Start Date:', value=datetime(2023, 1, 1))
end_date_widget   = widgets.DatePicker(description='End Date:',   value=datetime(2023, 12, 31))

mean_ndvi_toggle = widgets.Checkbox(
    value=False,
    description='Export Mean NDVI Only',
    indent=False
)

get_dates_button = widgets.Button(
    description='Get Available Dates',
    button_style='info',
    tooltip='Find available cloud-free dates',
    icon='search'
)
get_dates_button.on_click(get_available_dates)

initial_panel = widgets.VBox([
    widgets.HTML("<b>Step 1: Choose dates and (optionally) Mean NDVI</b>"),
    widgets.HBox([start_date_widget, end_date_widget]),
    mean_ndvi_toggle,
    get_dates_button
])
display(initial_panel)


VBox(children=(HTML(value='<b>Step 1: Choose dates and (optionally) Mean NDVI</b>'), HBox(children=(DatePicker…

Please upload/define an AOI first.


So i think after i do this, i should try learn how to use GeoAI to detect trees so we can cut those out easily. Then i can downlaod a pile of ndvi imagery for a ranch, cut out the trees and plot the ndvi time series in fairly high temporal resolution
Realistically i should try continue this notebook on as much as possible - split out fields, plot ndvi values over time for each one maybe using seaborn or plotly etc
