In [1]:
import ee

service_account = 'water-quality-app@waterqualityapp-383404.iam.gserviceaccount.com'
authentication_file = 'WaterQualityApp_PrivateKey.json'
credentials = ee.ServiceAccountCredentials(service_account, authentication_file)
ee.Initialize()

In [2]:
# !pip install geemap
import json
import os
import shapely
import pandas as pd
import geopandas as gpd
import numpy as np
from datetime import date, timedelta
import pyproj
from shapely.geometry import Polygon, MultiPolygon
from datetime import datetime, date

from CloudFree_SatelliteImagery import cld_free_sl2

try:
    import cPickle as pickle
except ImportError:  # Python 3.x
    import pickle

in_f_dir = '../Data/WQ_Data_Nov2021.p'
## Read the pikle data
with open(in_f_dir, 'rb') as f:
    dat_dic = pickle.load(f)

# Prepare the sampling dataset
stack_frame = dat_dic['StackFrame']
metadata = dat_dic['metadataWQ'].drop_duplicates()

# Filter the sampling dataset to contain only 2019 and later. This is because the COPERNICUS/S2_SR is only
# available from Dec 2018
filtered_stack_frame = stack_frame[stack_frame['Year'] > 2018]
filtered_metadata = metadata[metadata['nzsegment'].isin(filtered_stack_frame['nzsegment'])]

# Configure the cloud details
CONFIGURATION = {'CLOUD_FILTER': 80, 'CLD_PRB_THRESH': 50, 'NIR_DRK_THRESH': 0.15,
                 'CLD_PRJ_DIST': 1, 'BUFFER': 50}

# Transform the geometry to WGS84 projection
source_crs = "EPSG:2193"
target_crs = "EPSG:4326"
transformer = pyproj.Transformer.from_crs(source_crs, target_crs, always_xy=True)

# Create an empty dictonary that will contain all the RS Indices for all polygons and all sampling periods
rs_indices = gpd.GeoDataFrame()
# temp = filtered_metadata[filtered_metadata['nzsegment'] == 2038450.0]
for i, site in filtered_metadata.iterrows():
    seg_id = site['nzsegment']
    print(i, "We are collecting data for sampling site", seg_id)
    
    # Get the sampling dates for the current sampling site
    sampeling_dates = filtered_stack_frame['myDate'][filtered_stack_frame['nzsegment'] == seg_id].unique()

    # Get the geometry (the catchment polygon) of the sampling site
    geometry = site['geometry']
    if i > 33:
        try:
            # Assuming 'geometry' is a MultiPolygon
            if isinstance(geometry, MultiPolygon):
                geometry_list = []
                for polygon in geometry.geoms:
                    # Reproject the polygon to the target CRS
                    transformed_coordinates = [transformer.transform(x, y) for x, y in polygon.exterior.coords]
                    transformed_geometry = Polygon(transformed_coordinates)

#                     # Convert the geometry to GeoJSON
#                     geojson = transformed_geometry.__geo_interface__
#                     # Create an Earth Engine Geometry object
#                     ee_geometry = ee.Geometry(geojson)
                    
                    # Create an ee.Geometry.Polygon and add it to a list
                    ee_polygon = ee.Geometry.Polygon(transformed_coordinates)
                    geometry_list += [ee_polygon]

                # Create a ee.Geometry.MultiPolygon using the list of ee.Geometry objects
                multi_polygon = ee.Geometry.MultiPolygon(geometry_list)
                # Dissolve the ee.Geometry.MultiPolygon to create a single ee.Geometry.Polygon
                ee_polygon = multi_polygon.dissolve()

            else:
                # Reproject the polygon to the target CRS
                transformed_coordinates = [transformer.transform(x, y) for x, y in geometry.exterior.coords]
                transformed_geometry = Polygon(transformed_coordinates)

#                 # Convert the geometry to GeoJSON
#                 geojson = transformed_geometry.__geo_interface__
#                 # Create an Earth Engine Geometry object
#                 ee_geometry = ee.Geometry(geojson)
                ee_polygon = ee.Geometry.Polygon(transformed_coordinates)

        except Exception as e:
            print(f"Invalid geometry at index: {e}")
            continue

        date_dict = {}
        for date in sampeling_dates:
#             d = "2020-12-08"
#             d = datetime.strptime(d, "%Y-%m-%d")
#             d = d.date()
#             if date > d:
            try:
                START_DATE = (date - timedelta(days=14)).strftime("%Y-%m-%d")
                END_DATE = date.strftime("%Y-%m-%d")
                print('.....Date in progress:', START_DATE, END_DATE)

                # Create a cloud free sentinel-2 image for a specific region and time
                cld_free_image =  cld_free_sl2(START_DATE, END_DATE, CONFIGURATION, ee_polygon)

                ## Calculate the index
                MNDWI = cld_free_image.normalizedDifference(['B3','B11'])\
                        .rename("MNDWI") # Modified Normalized Difference Water Index
                NDVI = cld_free_image.normalizedDifference(['B8','B4'])\
                        .rename("NDVI") # Normalized Difference Vegetation Index
                NIR = cld_free_image.select('B8')
                NIRv = NDVI.multiply(NIR).rename("NIRv") # Another vegetation index
                NSMI = cld_free_image.normalizedDifference(['B8','B11']).rename("NSMI") # Normalized Soil Index
                BSI = (cld_free_image.normalizedDifference(['B3','B11']).rename("BSI")\
                       .subtract(cld_free_image.select('B8')).divide(cld_free_image.select('B11')\
                        .add(cld_free_image.select('B8')))) # Bare Soil Index

                # Calculate additional indices
                EVI = cld_free_image.expression('2.5 * ((NIR - RED) / (NIR + 6 * RED - 7.5 * BLUE + 1))', {
                    'NIR': cld_free_image.select('B8'),
                    'RED': cld_free_image.select('B4'),
                    'BLUE': cld_free_image.select('B2')
                }).rename("EVI") # Enhanced Vegetation Index
                SAVI = cld_free_image.expression('(1 + 0.5) * (NIR - RED) / (NIR + RED + 0.5)', {
                    'NIR': cld_free_image.select('B8'),
                    'RED': cld_free_image.select('B4')
                }).rename("SAVI") # Soil Adjusted Vegetation Index
                NDMI = cld_free_image.normalizedDifference(['B8', 'B11'])\
                        .rename("NDMI") # Normalized Difference Moisture Index
                NBR = cld_free_image.normalizedDifference(['B8', 'B12'])\
                        .rename("NBR") # Normalized Burn Ratio
                CI = cld_free_image.select('B4').divide(cld_free_image.select('B3')).subtract(1)\
                        .rename("CI") # Chlorophyll Index
                LAI = cld_free_image.expression('3.618 * exp(-0.488 * (RED - GREEN))', {
                    'RED': cld_free_image.select('B4'),
                    'GREEN': cld_free_image.select('B3')
                }).rename("LAI") # Leaf Area Index
                FAPAR = cld_free_image.expression('(NIR - RED) / (0.88 * NIR + 0.12 * RED)', {
                    'NIR': cld_free_image.select('B8'),
                    'RED': cld_free_image.select('B4')
                }).rename("FAPAR") # Fraction of Absorbed Photosynthetically Active Radiation

                # Concatenate calculated RS indices into an EE Image.
                calc_indices = ee.Image.cat(MNDWI, NDVI, NIRv, NSMI, BSI, EVI, SAVI, NDMI, NBR, CI, LAI, FAPAR).set('system:time_start', ee.Date(START_DATE).millis());
                calc_indices = calc_indices.clip(ee_polygon) # Clip the imaage to the extent of the current sampling site catchment
                
                # Define the list of indices
                indices = ['MNDWI', 'NDVI', 'NIRv', 'NSMI', 'BSI', 'EVI', 'SAVI', 'NDMI', 'NBR', 'CI', 'LAI', 'FAPAR']

                # Create an empty dictionary
                statistics_dict = {}

                # Calculate statistics for each index
                for index in indices:
                    # Calculate multiple statistics using reduceRegion
                    stats = calc_indices.reduceRegion(
                        reducer=ee.Reducer.min().combine(
                            reducer2=ee.Reducer.median().combine(
                                reducer2=ee.Reducer.max().combine(
                                    reducer2=ee.Reducer.stdDev(),
                                    sharedInputs=True
                                ),
                                sharedInputs=True
                            ),
                            sharedInputs=True
                        ),
                        geometry=ee_polygon,
                        scale=10,
                        maxPixels=1e19
                    )

                    # Add the statistics to the dictionary
                    statistics_dict[index] = {
                        # 'min': stats.get(f'{index}_min').getInfo(),
                        'median': stats.get(f'{index}_median').getInfo(),
                        # 'max': stats.get(f'{index}_max').getInfo(),
                        'stdv': stats.get(f'{index}_stdDev').getInfo()
                    }

                # Define the output file path
                site_date_indices = f'../Data/Indices/RS_Indices_{seg_id}_{END_DATE}.p'

                # Save the dictionary as a pickle file
                with open(site_date_indices, 'wb') as f:
                    pickle.dump(statistics_dict, f)

                date_dict[END_DATE]  = statistics_dict

            except Exception as e:
                print(f"Something went wrong: {e}")
                continue

        try:
            # Add the indices for the current date to the list of indices for the current sampling site
            rs_indices[f'{seg_id}'] = date_dict

            # Define the output file path
            site_indices = f'../Data/Indices/Sites/RS_Indices_{seg_id}.p'

            # Save the dictionary as a pickle file
            with open(site_indices, 'wb') as f:
                pickle.dump(rs_indices, f)

        except Exception as e:
                print(f"Something went wrong when trying to save the results for {seg_id}: {e}")
                continue

# # Add the RS Dictionary to the Sampling Data
# dat_dic['RsIdices'] = rs_indices

# # Define the output file path
# output_file = '../Data/WQ_Data_Nov2021_RS.p'

# # Save the dictionary as a pickle file
# with open(output_file, 'wb') as f:
#     pickle.dump(dat_dic, f)

The required modules are installed
0 We are collecting data for sampling site 2038450.0
1 We are collecting data for sampling site 2036529.0
2 We are collecting data for sampling site 2035811.0
3 We are collecting data for sampling site 2032466.0
4 We are collecting data for sampling site 2032082.0
5 We are collecting data for sampling site 2031444.0
6 We are collecting data for sampling site 2045595.0
7 We are collecting data for sampling site 2038228.0
9 We are collecting data for sampling site 2038644.0
10 We are collecting data for sampling site 2040298.0
11 We are collecting data for sampling site 2040105.0
12 We are collecting data for sampling site 2040035.0
14 We are collecting data for sampling site 2039146.0
15 We are collecting data for sampling site 2039478.0
16 We are collecting data for sampling site 2041640.0
17 We are collecting data for sampling site 2040911.0
18 We are collecting data for sampling site 2035301.0
19 We are collecting data for sampling site 2033563.0
20

KeyboardInterrupt: 