<a href="https://colab.research.google.com/github/andyarnell/sepal_mgci/blob/master/SDG_15_4_2_Sub_A_Default_values.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **SDG 15.4.2 Sub-indicator A: Calculate Global Default Values**

* This script allows batch processing for this indicator for all countries.

* Output is a combined excel file on your Google Drive.

* Runs on the cloud using [Google Colab](https://research.google.com/colaboratory/faq.html)

* Requires: [Google Earth Engine](https://earthengine.google.com/) (GEE) account and project and access to Google Drive


### 1) Install required packages

In [1]:
# to automatically reload modules.
%load_ext autoreload

# Set to reload all modules before executing code.
%autoreload 2

# Function to install a package if it's not already installed
def install_if_not_exists(package_name):
    try:
        __import__(package_name)
        print(f"{package_name} is already installed.")
    except ImportError:
        !pip install -q {package_name}
        print(f"{package_name} has been installed.")

# List of packages to install if not already installed
packages_to_install = ['ipyvuetify','ee', 'unidecode', 'google-api-python-client',
                      'google-auth-httplib2', 'google-auth-oauthlib','geemap'] #admin_boundaries

# Install necessary packages
for package in packages_to_install:
    install_if_not_exists(package)

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.1/6.1 MB[0m [31m34.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m54.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m38.1 MB/s[0m eta [36m0:00:00[0m
[?25hipyvuetify has been installed.
ee is already installed.
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hunidecode has been installed.
google-api-python-client has been installed.
google-auth-httplib2 has been installed.
google-auth-oauthlib has been installed.
geemap is already installed.


### 2) Access GitHub repository
Clones repository for SDG 15.4.2 into colab.
Provides functions and lookup tables etc.

In [2]:
# Change the current working directory to "/content" for cloning the repo into.
%cd "/content"

# Clone the GitHub repository "sepalz_mgci" into the current directory.
# NB 'fatal' error on reruns are typically just saying it already exists
!git clone https://github.com/sepal-contrib/sepal_mgci

# Change working directory to the cloned sepal_mgci github repository
%cd "/content/sepal_mgci"

/content
Cloning into 'sepal_mgci'...
remote: Enumerating objects: 3071, done.[K
remote: Counting objects: 100% (1009/1009), done.[K
remote: Compressing objects: 100% (426/426), done.[K
remote: Total 3071 (delta 663), reused 807 (delta 582), pack-reused 2062[K
Receiving objects: 100% (3071/3071), 5.02 MiB | 17.37 MiB/s, done.
Resolving deltas: 100% (1939/1939), done.
/content/sepal_mgci


### 3) Setup Google Earth Engine
Launches access request pop up window

In [None]:
# Google Earth Engine project
gee_project_name = "ee-andyarnellgee" # "insert cloud project here"  # a registered cloud project (if unsure of name see pic here: https://developers.google.com/earth-engine/cloud/assets)

import ee # google earth engine

ee.Authenticate()

ee.Initialize(project=gee_project_name) # NB gee project name is defined in parameters section

### 4) Setup Google Drive
Launches access request pop up window

In [None]:
# for accessing google drive
from google.colab import auth, drive
from googleapiclient.discovery import build

drive.mount('/content/drive')

### 5) Import remaining packages
- imports required packages and functions listed in 'colab_imports.py' script (found here: sepal_mgci/component/scripts)



In [None]:
from component.scripts.colab_imports import *

### 6) Set parameters


Input parameters

In [None]:
# Google Earth Engine project
gee_project_name = "ee-andyarnellgee" # "insert cloud project here"  # a registered cloud project (if unsure of name see pic here: https://developers.google.com/earth-engine/cloud/assets)


# Admin boundaries asset
admin_asset_id = "FAO/GAUL/2015/level0" # administrative units feature collection

admin_asset_property_name = "ADM0_NAME" # property/column name for selecting admin boundaries (e.g. ISO3 code or country name)


# Land cover assets

# For Sub-indicator A (sub_a), we need to set the following structure.
a_years = {
    1: {"asset": "users/amitghosh/sdg_module/esa/cci_landcover/2000", "year": 2000}, # baseline
    2: {"year": 2003, "asset": "users/amitghosh/sdg_module/esa/cci_landcover/2003"}, # subsequent reporting years...
    3: {"year": 2007, "asset": "users/amitghosh/sdg_module/esa/cci_landcover/2007"},
    4: {"year": 2010, "asset": "users/amitghosh/sdg_module/esa/cci_landcover/2010"},
}



Output parameters

---



In [None]:
final_report_folder = "sdg_15_4_2_A_combined_report" # folder name in Google Drive for final output (if doesnt exist creates one)

final_report_name = "sdg_15_4_2_A_default_global.xlsx" # file name for final excel output

# export GEE tasks or not
export = False # default: True. Set to False if debugging or limiting accidental re-exporting of tasks

# prints more messages
debug = False # default: False. Set to True if debugging code

Temporary output parameters


In [None]:
stats_csv_folder = "sdg_15_4_2_A_csvs" # for storing stats tables exported from GEE for each admin boundary/AOI

excel_reports_folder = "sdg_15_4_2_A_reports" # for storing formatted excel tables for each admin boundary/AOI

drive_home ="/content/drive/MyDrive/" # Google Drive location. Don't change unless you know this is incorrect

error_log_file_path = drive_home + excel_reports_folder + "/"+"1_error_log" +".csv" # for storing errors


### 7) Setup inputs for processing

Create list of boundaries to process

In [None]:
# admin boundary feature collection
admin_boundaries = ee.FeatureCollection(admin_asset_id)

# list to process
list_of_countries = admin_boundaries.aggregate_array(admin_asset_property_name).getInfo()

print ("Length of admin boundaries to process", len(list_of_countries))

list_of_countries = list(set(list_of_countries)) # remove dupicates

print ("Length of distinct admin boundaries to process", (len(set(list_of_countries))))


Read the default land cover remapping table and convert it to a dictionary

In [None]:
default_map_matrix = map_matrix_to_dict(LC_MAP_MATRIX)

Select years of land cover to process

In [None]:
# extracts the years from the a_years dictionary (as defined in parameters)
single_years = [y["year"] for  y in a_years.values()]

### 8) Calculate area statistics by country
* Runs for each country and each mountain biobelt
* Gets area of land cover reclassified into the 10 SEAM classes
* Repeat for each year specified


In [None]:
# you can monitor your GEE tasks here : https://code.earthengine.google.com/tasks
create_folder_if_not_exists(stats_csv_folder) # to store outputs in google drive

counter=0 # starting place of counter used to keep track of number of tasks that are being run

for aoi_name in list_of_countries:

    aoi = admin_boundaries.filter(ee.Filter.eq(admin_asset_property_name,aoi_name))#.first()

    # gets areas of landcover in each mountain belt in each country
    # uses reduce_regions function imported from the cloned sepal_mgci git hub repository (see Imports section)
    # pixels counted at native resolution (scale) of input land cover (or DEM if RSA implementation)
    process = ee.FeatureCollection([
        ee.Feature(
            None,
            reduce_regions(
                aoi,
                remap_matrix=default_map_matrix,
                rsa=False,
                # dem=param.DEM_DEFAULT,
                dem=DEM_DEFAULT, #default digital elevation model (DEM). Relevant for the real surface area (RSA) implementation.
                lc_years= year,
                transition_matrix=False
            )
        ).set("process_id", year[0]["year"])
        for year in get_a_years(a_years) # creates GEE images and runs stats on each. Images to run are in the 'a_years" dictionary (above)
    ])

    #make name acceptable for running tasks (i.e., removes special characters)
    task_name = str(sanitize_description(unidecode(aoi_name)))


    task = ee.batch.Export.table.toDrive(
        **{  #asterisks unpack dictionary into keyword arguments format
            "collection": process,
            "description": task_name,
            "fileFormat": "CSV",
            "folder":stats_csv_folder,
            "selectors": [
                "process_id",
                "sub_a",
            ],
        }
    )

    counter+=1

    print (f"\r process {counter}/{len(list_of_countries)} {aoi_name} ", end="") #print in place (remove \r and end="" for verbose version)

    if export:
      task.start()



### 9) Read and translate results into report tables

#####NOTE: you will need to wait until results files for each country have been created in your google drive (from previous step).
- see here to monitor the tasks https://code.earthengine.google.com/tasks
- once tasks are complete, you can run the cell below

This cell formats individual excel reports for each country.
See Error_log.csv for missing files/errors

In [None]:
# Initialize the counter
counter = 0

# to store outputs in google drive
create_folder_if_not_exists(excel_reports_folder)

# Loop over each AOI name in the list of countries
for aoi_name in list_of_countries:
    counter += 1

    # Clean the AOI name
    aoi_name_clean = str(sanitize_description(unidecode(aoi_name)))

    # Construct the file path for the stats CSV file
    stats_csv_file = aoi_name_clean + ".csv"
    stats_csv_file_path = os.path.join(drive_home, stats_csv_folder, stats_csv_file)

    message = f"Process {counter}, {stats_csv_file}"

    try:
        # Read the results from the CSV file and parse it to a dictionary
        dict_results = read_from_csv(stats_csv_file_path)

        details = {
            "geo_area_name": aoi_name,
            "ref_area": " ",
            "source_detail": " ",
        }

        # Generate reports for the sub_a and mtn indicators
        sub_a_reports = [sub_a.get_reports(parse_result(dict_results[year]["sub_a"], single=True), year, **details) for year in single_years]
        mtn_reports = [mntn.get_report(parse_result(dict_results[year]["sub_a"], single=True), year, **details) for year in single_years]

        # Concatenate the mtn reports
        mtn_reports_df = pd.concat(mtn_reports)

        # Concatenate the sub a reports
        er_mtn_grnvi_df = pd.concat([report[0] for report in sub_a_reports])
        er_mtn_grncov_df = pd.concat([report[1] for report in sub_a_reports])

        # Define the output report file path
        report_file_path = os.path.join(drive_home, excel_reports_folder, aoi_name_clean + ".xlsx")

        # Create the Excel file with the reports
        with pd.ExcelWriter(report_file_path) as writer:
            mtn_reports_df.to_excel(writer, sheet_name="Table1_ER_MTN_TOTL", index=False)
            er_mtn_grncov_df.to_excel(writer, sheet_name="Table2_ER_MTN_GRNCOV", index=False)
            er_mtn_grnvi_df.to_excel(writer, sheet_name="Table3_ER_MTN_GRNCVI", index=False)

            # Adjust column widths and alignment for each sheet
            for sheetname in writer.sheets:
                worksheet = writer.sheets[sheetname]
                for col in worksheet.columns:
                    max_length = max(len(str(cell.value)) for cell in col)
                    column = col[0]
                    adjusted_width = max(max_length, len(str(column.value))) + 4
                    worksheet.column_dimensions[get_column_letter(column.column)].width = adjusted_width

                    # Align "obs_value" column to the right
                    if "OBS" in column.value:
                        for cell in col:
                            cell.alignment = Alignment(horizontal="right")

    except Exception as e:
        # If an error occurs, catch the exception and handle it
        message = f"process {counter}, {stats_csv_file}, Error: {e}"

        # Get the current time
        current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')

        # Write the error message and file name to the error log file
        error_info = pd.DataFrame([[stats_csv_file, str(e), current_time]], columns=['File Name', 'Error Message', 'Time'])

        mode = 'w' if not os.path.exists(error_log_file_path) else 'a'
        header = False if os.path.exists(error_log_file_path) else True

        # Append or write to the error log file
        error_info.to_csv(error_log_file_path, mode=mode, header=header, index=False)

    print(message)


### 10) Combine excel report files into one

Make a list of files to combine

In [None]:
# Directory path where Excel reports are stored
directory_path = os.path.join(drive_home, excel_reports_folder)

# List files in the directory with '.xlsx' extension
files = [file for file in os.listdir(directory_path) if file.endswith('.xlsx')]

# Create a list of full file paths
full_file_paths = [os.path.join(directory_path, file) for file in files]

# Print the number of Excel files found in the folder
print(f"Number of Excel files in folder: {len(full_file_paths)}")

# folder to store outputs in google drive
create_folder_if_not_exists(final_report_folder)

# File path for the combined final report
reports_combined_file_path = os.path.join(drive_home, final_report_folder, final_report_name)


##### Run function to combine into a single report

In [None]:
append_excel_files(file_paths=full_file_paths,num_sheets=3,output_file_path=reports_combined_file_path)

print (f"\n Complete! Output file for SDG 15.4.2 Sub-indicator A here: {reports_combined_file_path}")