<a href="https://colab.research.google.com/github/andyarnell/sepal_mgci/blob/master/Calculation_colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Clone SDG 15.4.2 github repo into colab instance

In [5]:
# Change the current working directory to "/content".
%cd "/content"

# Clone the GitHub repository "sepal_mgci" into the current directory.
!git clone https://github.com/andyarnell/sepal_mgci.git

/content
fatal: destination path 'sepal_mgci' already exists and is not an empty directory.


#### Installing missing packages

In [9]:
# Load the autoreload extension to automatically reload modules.
%load_ext autoreload

# Set autoreload to reload all modules before executing code.
%autoreload 2

# Function to install a package if it's not already installed
def install_if_not_exists(package_name):
    try:
        __import__(package_name)
        print(f"{package_name} is already installed.")
    except ImportError:
        !pip install -q {package_name}
        print(f"{package_name} has been installed.")

# List of packages to install if not already installed
packages_to_install = ['geemap', 'unidecode', 'google-api-python-client',
                      'google-auth-httplib2', 'google-auth-oauthlib','sepal_ui']

# Install necessary packages
for package in packages_to_install:
    install_if_not_exists(package)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
geemap is already installed.
unidecode is already installed.
google-api-python-client has been installed.
google-auth-httplib2 has been installed.
google-auth-oauthlib has been installed.
sepal_ui is already installed.


#### Importing packages
##### Includes modules and functions from the sepal_mgci repository

In [10]:
import ee # google earth engine
import os # operating system library

# Authenticate and initialize Google Earth Engine
gee_project_name = "ee-andyarnellgee"  # Need a valid Google project name

ee.Authenticate()

ee.Initialize(project=gee_project_name)

from datetime import datetime # for timestamtping error log
import pandas as pd # pandas library for tabular data manipulation
# import geemap # useful for conv
import re # for manipulating strings
# import pygaul # not using currently

from unidecode import unidecode # for converting non ascii compliant symbols in country names
from google.colab import auth # for accessing google drive
from google.colab import drive
from googleapiclient.discovery import build # for getting authorisated access to google drive
from openpyxl.utils import get_column_letter # formatting excel report file
from openpyxl.styles import Alignment # formatting excel report file

# Change current directory to sepal_mgci (i.e. the local copy of the github repository)
%cd "/content/sepal_mgci"


# Import specific parameters needed
from component.parameter.module_parameter import DEM_DEFAULT, LC_MAP_MATRIX

# Import scripts and modules (mostly functions for mgci calculation and formatting)
from component.scripts.gee import reduce_regions # for running summary statistics in GEE
from component.scripts.scripts import get_a_years, map_matrix_to_dict, parse_result # parameter prep and reformatting
from component.scripts.scripts import read_from_csv
from component.scripts import sub_a, sub_b, mountain_area as mntn

print("Imports complete")

/content/sepal_mgci
Imports complete


#### Get access to Google Drive for script to retrieve output csvs
Includes agreeing to access request pop up window

In [13]:
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


##### Functions

In [14]:
def folder_exists(folder_name, parent_folder_id=None):
    """
    Check if a folder exists in Google Drive.

    Args:
    - folder_name (str): Name of the folder to check.
    - parent_folder_id (str): ID of the parent folder where to search for the folder.
                              Default is None, meaning the search will be performed in the root.

    Returns:
    - bool: True if the folder exists, False otherwise.
    """
    # Authenticate user
    auth.authenticate_user()

    # Build the Drive v3 service
    drive_service = build('drive', 'v3')

    # Prepare query to check if folder exists
    query = f"name='{folder_name}' and mimeType='application/vnd.google-apps.folder' and trashed=false"
    if parent_folder_id:
        query += f" and '{parent_folder_id}' in parents"

    try:
        # Execute the search query
        folders = drive_service.files().list(q=query, fields='files(id)', includeItemsFromAllDrives=True, supportsAllDrives=True).execute().get('files', [])
        return bool(folders)
    except Exception as e:
        print(f"An error occurred: {e}")
        return False


def create_folder(folder_name, parent_folder_id=None):
    """
    Create a folder in Google Drive.

    Args:
    - folder_name (str): Name of the folder to be created.
    - parent_folder_id (str): ID of the parent folder where the new folder will be created.
                              Default is None, meaning the folder will be created in the root.

    Returns:
    - str: ID of the newly created folder.
    """
    # Authenticate user
    auth.authenticate_user()

    # Build the Drive v3 service
    drive_service = build('drive', 'v3')

    # Prepare folder metadata
    folder_metadata = {
        'name': folder_name,
        'mimeType': 'application/vnd.google-apps.folder'
    }
    if parent_folder_id:
        folder_metadata['parents'] = [parent_folder_id]

    # Create the folder
    folder = drive_service.files().create(body=folder_metadata, fields='id').execute()

    # Return the ID of the newly created folder
    return folder.get('id')


def create_folder_if_not_exists(folder_name, parent_folder_id=None):
    """
    Create a folder in Google Drive if it doesn't already exist.

    Args:
    - folder_name (str): Name of the folder to be created.
    - parent_folder_id (str): ID of the parent folder where the new folder will be created.
                              Default is None, meaning the folder will be created in the root.

    Returns:
    - str: ID of the newly created folder or the existing folder if it already exists.
    """
    if folder_exists(folder_name, parent_folder_id):
        print(f"Folder '{folder_name}' already exists.")
        return None
    else:
        return create_folder(folder_name, parent_folder_id)


def sanitize_description(description):
    allowed_characters_pattern = r"[^a-zA-Z0-9.,:;_ \-]"  # Define a regex pattern for characters not in the allowed set
    sanitized_description = re.sub(allowed_characters_pattern, "", description)  # Remove characters not in the allowed set
    return sanitized_description



####Parameters:
Set output folder paths etc

In [15]:
drive_home ="/content/drive/MyDrive/" # Google Drive location. Should be the same for most people.

final_report_name = "sdg_15_4_2_A_default_global.xlsx" # final excel file name

final_report_folder = "sdg_15_4_2_A_combined_report" # final excel output folder

stats_csv_folder = "sdg_15_4_2_A_csvs" # Folder name for stats tables exported from GEE for each AOI

excel_reports_folder = "sdg_15_4_2_A_reports" # Folder name for formatted excel tables for each AOI

admin_asset_property_name = "ADM0_NAME" # property/column name in asset: used in report for "geo_area_name" column

error_log_file_path = drive_home + excel_reports_folder + "/"+"1_error_log" +".csv"

create_folder_if_not_exists(stats_csv_folder) # Create the folder in Google Drive if it doesn't exist
create_folder_if_not_exists(excel_reports_folder) # Create the folder in Google Drive if it doesn't exist
create_folder_if_not_exists(final_report_folder) # Create the folder in Google Drive if it doesn't exist

debug = False # more verbose messaging - set to true for debugging code

export = False # set to true to run GEE stats processing tasks (if false allows checks on code)

Folder 'sdg_15_4_2_A_csvs' already exists.
Folder 'sdg_15_4_2_A_reports' already exists.
Folder 'sdg_15_4_2_A_combined_report' already exists.


In [16]:
# admin_boundaries = ee.FeatureCollection("FAO/GAUL/2015/level0") # NB 16 extra rows: Canada (9), United States of America (4), Australia (2) and West Bank (1).

admin_boundaries = ee.FeatureCollection("FAO/GAUL_SIMPLIFIED_500m/2015/level0") # NB 4 extra rows: Canada (3) and West Bank (1)

# admin_boundaries = admin_boundaries.filter(ee.Filter.gt("Shape_Area",10)).limit(10)

list_of_countries = admin_boundaries.aggregate_array(admin_asset_property_name).getInfo()

list_of_countries = list(set(list_of_countries)) # remove dupicates
print ("Length of admin boundaries to process", len(list_of_countries))
print ("Length of distinct admin boundaries to process", (len(set(list_of_countries))))


Length of admin boundaries to process 276
Length of distinct admin boundaries to process 276


## SUB INDICATOR A

In [17]:
# Read the default land cover remapping table and convert it to a dictionary
default_map_matrix = map_matrix_to_dict(LC_MAP_MATRIX)

In [18]:
# For SUB_A indicator, we need to set the following structure
a_years = {
    1: {"asset": "users/amitghosh/sdg_module/esa/cci_landcover/2000", "year": 2000},
    2: {"year": 2003, "asset": "users/amitghosh/sdg_module/esa/cci_landcover/2003"},
    3: {"year": 2007, "asset": "users/amitghosh/sdg_module/esa/cci_landcover/2007"},
    4: {"year": 2010, "asset": "users/amitghosh/sdg_module/esa/cci_landcover/2010"},
}

# Just extract the years from the a_years dictionary
single_years = [y["year"] for  y in a_years.values()]

In [19]:
# you can monitor your GEE tasks here : https://code.earthengine.google.com/tasks (and you can bulk cancel all if needed!)

counter=0 # starting place of counter used to keep track of number of tasks that are being run

for aoi_name in list_of_countries:

    aoi = admin_boundaries.filter(ee.Filter.eq(admin_asset_property_name,aoi_name))#.first()

    # gets areas of landcover in each mountain belt in each country
    # uses reduce_regions function imported from the cloned sepal_mgci git hub repository (see Imports section)
    # pixels counted at native resolution (scale) of input land cover (or DEM if RSA implementation)
    process = ee.FeatureCollection([
        ee.Feature(
            None,
            reduce_regions(
                aoi,
                remap_matrix=default_map_matrix,
                rsa=False,
                # dem=param.DEM_DEFAULT,
                dem=DEM_DEFAULT, #default digital elevation model (DEM). Relevant for the real surface area (RSA) implementation.
                lc_years= year,
                transition_matrix=False
            )
        ).set("process_id", year[0]["year"])
        for year in get_a_years(a_years) # creates GEE images and runs stats on each. Images to run are in the 'a_years" dictionary (above)
    ])

    #make name acceptable for running tasks (i.e., removes special characters)
    task_name = str(sanitize_description(unidecode(aoi_name)))


    task = ee.batch.Export.table.toDrive(
        **{  #asterisks unpack dictionary into keyword arguments format
            "collection": process,
            "description": task_name,
            "fileFormat": "CSV",
            "folder":stats_csv_folder,
            "selectors": [
                "process_id",
                "sub_a",
            ],
        }
    )

    counter+=1

    print (f"\r process {counter}/{len(list_of_countries)} {aoi_name} ", end="") #print in place (remove \r and end="" for verbose version)

    if export:
      task.start()



 process 276/276 Guyana 

# Read, process, and create report tables

#####Manually check your earth engine task status, once the tasks are complete, run the next cell.

The cell will produce formatted excel files for each country.
Missing files or other errors will be listed in the Error_log.csv

In [None]:

import os
import pandas as pd
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment
from datetime import datetime

# Initialize the counter
counter = 0

# Loop over each AOI name in the list of countries
for aoi_name in list_of_countries:

    # Increment the counter for each iteration
    counter += 1

    # Clean the AOI name
    aoi_name_clean = str(sanitize_description(unidecode(aoi_name)))

    try:
        # Construct the file path for the stats CSV file
        stats_csv_file = aoi_name_clean + ".csv"
        stats_csv_file_path = os.path.join(drive_home, stats_csv_folder, stats_csv_file)

        message = (f"Process {counter}, {stats_csv_file}")

        # Read the results from the CSV file and parse it to a dictionary
        dict_results = read_from_csv(stats_csv_file_path)

        details = {
            "geo_area_name": aoi_name,
            "ref_area": " ",
            "source_detail": " ",
        }

        # Generate reports for the sub_a and mtn indicators
        sub_a_reports = []
        mtn_reports = []

        for year in single_years:
            if debug: print(f"Reporting {year} for sub_a")
            parsed_df = parse_result(dict_results[year]["sub_a"], single=True)
            sub_a_reports.append(sub_a.get_reports(parsed_df, year, **details))

            if debug: print(f"Reporting {year} for mtn")
            mtn_reports.append(mntn.get_report(parsed_df, year, **details))

        # Concatenate the mtn reports
        mtn_reports_df = pd.concat(mtn_reports)

        # Concatenate the sub a reports
        er_mtn_grnvi_df = pd.concat([report[0] for report in sub_a_reports])
        er_mtn_grncov_df = pd.concat([report[1] for report in sub_a_reports])

        # Define the output report file path
        report_file_path = os.path.join(drive_home, excel_reports_folder, aoi_name_clean + ".xlsx")
        if debug: print("Report file path:", report_file_path)

        # Create the Excel file with the reports
        with pd.ExcelWriter(report_file_path) as writer:
            mtn_reports_df.to_excel(writer, sheet_name="Table1_ER_MTN_TOTL", index=False)
            er_mtn_grncov_df.to_excel(writer, sheet_name="Table2_ER_MTN_GRNCOV", index=False)
            er_mtn_grnvi_df.to_excel(writer, sheet_name="Table3_ER_MTN_GRNCVI", index=False)

            # Adjust column widths and alignment for each sheet
            for sheetname in writer.sheets:
                worksheet = writer.sheets[sheetname]
                for col in worksheet.columns:
                    max_length = 0
                    column = col[0]
                    for cell in col:
                        try:
                            if len(str(cell.value)) > max_length:
                                max_length = len(cell.value)
                        except:
                            pass
                    adjusted_width = max(max_length, len(str(column.value))) + 4
                    worksheet.column_dimensions[get_column_letter(column.column)].width = adjusted_width

                    # Align "obs_value" column to the right
                    if "OBS" in column.value:
                        for cell in col:
                            cell.alignment = Alignment(horizontal="right")

    except Exception as e:
        # If an error occurs, catch the exception and handle it
        message = (f"process {counter}, {stats_csv_file}, Error: {e}")

        # Get the current time
        current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        # Write the error message and file name to the error log file
        error_info = pd.DataFrame([[stats_csv_file, str(e), current_time]], columns=['File Name', 'Error Message', 'Time'])

        if not os.path.exists(error_log_file_path):
            error_info.to_csv(error_log_file_path, mode='w', index=False)
        else:
            error_info.to_csv(error_log_file_path, mode='a', header=False, index=False)

    print (f"{message}")

#### Combine excel files into a single report

In [None]:
import pandas as pd

def append_excel_files(file_paths, num_sheets, output_file_path):
    # Initialize a dictionary to store combined DataFrames from different files
    combined_dfs = {}

    # Initialize a counter to track the progress of file processing
    counter = 0

    # Iterate over each file path in the list
    for file_path in file_paths:
        # Load the Excel file
        xls = pd.ExcelFile(file_path)  # Reads file and stores as an ExcelFile object (using the Pandas library)

        # Increment the counter for each iteration
        counter += 1

        # Read each sheet from the Excel file into a DataFrame
        # Only read up to num_sheets specified
        dfs = {sheet_name: xls.parse(sheet_name) for sheet_name in xls.sheet_names[:num_sheets]}

        # Append the DataFrames to the combined_dfs dictionary
        for sheet_name, df in dfs.items():
            if sheet_name in combined_dfs:
                # If the sheet already exists in combined_dfs, concatenate the current DataFrame with the existing one
                combined_dfs[sheet_name] = pd.concat([combined_dfs[sheet_name], df], ignore_index=True)
            else:
                # If the sheet does not exist in combined_dfs, add the DataFrame directly
                combined_dfs[sheet_name] = df

        # Print the progress of processing, overwriting the previous progress
        print(f"\rProcessing {counter}/{len(file_paths)}: {file_path}", end="")

    # Write the combined DataFrames to the specified output file path
    with pd.ExcelWriter(output_file_path) as writer:
        for sheet_name, df in combined_dfs.items():
            # Write each DataFrame to a separate sheet in the output Excel file
            df.to_excel(writer, sheet_name=sheet_name, index=False)


In [None]:
directory_path = drive_home + excel_reports_folder

# # List files in the directory
files = [file for file in os.listdir(directory_path) if file.endswith('.xlsx')]

# Create a list of full file paths
full_file_paths = [os.path.join(directory_path, file) for file in os.listdir(directory_path) if file.endswith('.xlsx')]
print(f"Number of excel files in folder {len(full_file_paths)}")

reports_combined_file_path = drive_home + final_report_name


In [None]:
append_excel_files(file_paths=full_file_paths,num_sheets=3,output_file_path=reports_combined_file_path)

print (f"\n Complete! Output file for SDG 15.4.2 Component A here: {reports_combined_file_path}")