# Demo - Build CRF with Sentinel-2-L2A

This notebook demonstrates how to query the STAC API for Sentinel-2 data, process the data to remove cloud pixels, and create a cloud-free composite image using ArcGIS Pro and the ArcGIS Image Analyst extension.

## Prerequisites

- ArcGIS Pro
- ArcGIS Image Analyst extension
- Access to the STAC API

## Import Libraries

First, we import the necessary libraries:

In [1]:
import arcpy
from arcpy import AIO
from datetime import datetime, timedelta

## Define Parameters

We define the following parameters:

- **STAC API URL**: The URL for the STAC API for Sentinel-2 data.
- **Path to the Feature Class**: The path to the feature class representing the Area of Interest (AOI).
- **Output CRF Path**: The path where the output CRF will be saved.
- **Path to the ACS File**: The path to the ACS file for Sentinel-2 data.

In [2]:
# Define the STAC API URL for Sentinel-2 data
stac_api_url = "https://planetarycomputer.microsoft.com/api/stac/v1"

# Define the path to the feature class representing the AOI
aoi_feature_class = r'F:\ArcGIS Pro Projects\South_Lebanon\South_Lebanon.gdb\lbn_admbnda_adm3'

# Define the output CRF path
output_crf_path = r'F:\ArcGIS Pro Projects\South_Lebanon\Lebanon_Sentinel2.crf'


# Define the path to the ACS file
acs_file_path = r'C:\AMPC_Resources\ACS_Files\esrims_pc_sentinel-2-l2a.acs'
a_sentinel_2 = AIO(acs_file_path)

# Create a feature layer from the projected AOI feature class
aoi_layer = "aoi_layer"
arcpy.MakeFeatureLayer_management(aoi_feature_class, aoi_layer)

# Create DatePicker widgets for start and end dates
start_date_picker = widgets.DatePicker(
    description='Start Date',
    disabled=False
)
end_date_picker = widgets.DatePicker(
    description='End Date',
    disabled=False
)

# Function to update the start and end dates
def update_dates(change):
    global start_date, end_date
    start_date = start_date_picker.value.strftime("%Y-%m-%d")
    end_date = end_date_picker.value.strftime("%Y-%m-%d")
    print(f"Start Date: {start_date}, End Date: {end_date}")

# Attach the update function to the DatePicker widgets
start_date_picker.observe(update_dates, names='value')
end_date_picker.observe(update_dates, names='value')

# Display the DatePicker widgets
display(start_date_picker, end_date_picker)


In [None]:
# Get the extent of the feature layer
extent = arcpy.Describe(aoi_layer).extent
# Delete AOI layer
arcpy.Delete_management(aoi_layer)

bbox_coords = [extent.XMin, extent.YMin, extent.XMax, extent.YMax]

# Construct the STAC API query
query = {
    "collections": ["sentinel-2-l2a"],
    "bbox": bbox_coords,
    "query": {"platform": {"in": ["Sentinel-2A"]}},
    "datetime": f"{start_date}/{end_date}",
    "limit": 100
}

# Example attribute_dict for Landsat collection 2 level 2 product
attribute_dict = {
    "Name":"id",
    "Cloud Cover":"eo:cloud_cover",
    "StdTime":"datetime",
    "Platform":"platform",
    "Spatial Reference":"proj:epsg",
    "Extent": "bbox",
}

# Create a RasterCollection object that contains the search results
rc = arcpy.ia.RasterCollection.fromSTACAPI(stac_api=stac_api_url,
                                           query=query,
                                           attribute_dict=attribute_dict)

In [None]:
def remove_cloud(item):
    #masks the clouds by the SCL band and also removes the bands we dont need
    raster = item['Raster']
    try:
        thetime = str(item["AcquisitionDate"]).split('T')[0].split('-')
        timevar="AcquisitionDate"
    except: 
        thetime = str(item["StdTime"]).split('T')[0].split('-')
        timevar="StdTime"
        
    numtime=int(thetime[0]+thetime[1]+thetime[2])
    sclband = arcpy.ia.ExtractBand(raster,[13])
    #we dont ned all bands from her on, so lets reduce the bands to what we need ...
    reduced_ras=arcpy.ia.ExtractBand(raster,[1,2,3,4,5,6,7,8,9,10,11,12])
    #now create a mask-raster by remapping based on the SCL band to take out all NoData, Clouds, CloudShadow, Cirrus, Undefined Pixels
    #those values in the SCL band are 0 to 3 and 7 to 10
    #as our mask definition always ranges from low[included] to high[excluded], we have to specify
    # the Nodata ranges here as [0-4] and [7-11]
    cloud_mask=arcpy.ia.Remap(raster=sclband,input_ranges=[4,7,11,15], output_values=[1,1],no_data_ranges=[0,4,7,11],allow_unmatched=False)
    #and finally clip the original raster (with all its bands) using the remapped mask dataset    
    cloud_free_raster = arcpy.ia.Clip(reduced_ras, aoi = cloud_mask)
     #then return the raster back into the now cloud_free Raster collection 
    return {'raster': cloud_free_raster, "AcquisitionDate": item[timevar]}

In [5]:
# Apply the above function to remove cloud pixels from each image in the RasterCollection
rc_cloud_free = rc.map(remove_cloud)

{'type': 'FeatureCollection',
 'features': [],
 'links': [{'rel': 'root',
   'type': 'application/json',
   'href': 'https://planetarycomputer.microsoft.com/api/stac/v1/'},
  {'rel': 'self',
   'type': 'application/json',
   'href': 'https://planetarycomputer.microsoft.com/api/stac/v1/search'}]}

In [None]:
# Apply the Median function to find the most representative pixels value from overlapping images
cloud_free_composite_median = rc_cloud_free.median(ignore_nodata = True, extent_type = 'UnionOf')

In [None]:
#Save the Cloud free composite  median
cloud_free_composite_median.save(output_crf_path)

In [2]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import shape

def main():
    # this is the name of the geography you want to retrieve. update to meet your needs
    location = 'Lebanon'

    dataset_links = pd.read_csv("https://minedbuildings.blob.core.windows.net/global-buildings/dataset-links.csv")
    greece_links = dataset_links[dataset_links.Location == location]
    for _, row in greece_links.iterrows():
        df = pd.read_json(row.Url, lines=True)
        df['geometry'] = df['geometry'].apply(shape)
        gdf = gpd.GeoDataFrame(df, crs=4326)
        gdf.to_file(f"{row.QuadKey}.geojson", driver="GeoJSON")


if __name__ == "__main__":
    main()

In [None]:
import arcpy
import os

# Define the directory containing the geojson files
geojson_directory = r'F:\ArcGIS Pro Projects\South_Lebanon\Buildings\GeoJson'

# Define the output feature class path
output_gdb = r'F:\ArcGIS Pro Projects\South_Lebanon\South_Lebanon.gdb'
output_feature_class = os.path.join(output_gdb, 'Buildings_Lebanon')

# Create a list to store the paths of the geojson files
geojson_files = []

# Loop through the directory to locate all geojson files
for root, dirs, files in os.walk(geojson_directory):
    for file in files:
        if file.endswith('.geojson'):
            geojson_files.append(os.path.join(root, file))

# Check if there are any geojson files found
if not geojson_files:
    print("No geojson files found in the specified directory.")
else:
    # Create an empty feature class to merge the geojson files into
    arcpy.management.CreateFeatureclass(output_gdb, 'Buildings_Lebanon', 'POLYGON', spatial_reference=4326)

    # Loop through each geojson file and append it to the feature class
    for geojson_file in geojson_files:
        temp_fc = 'in_memory/temp_fc'
        arcpy.conversion.JSONToFeatures(geojson_file, temp_fc)
        arcpy.management.Append(temp_fc, output_feature_class, 'NO_TEST')
        arcpy.management.Delete(temp_fc)

    print(f"All geojson files have been merged into the feature class: {output_feature_class}")