In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import ee
ee.Initialize()

In [3]:
import os
from datetime import datetime
import pandas as pd
from openpyxl.utils import get_column_letter
from openpyxl.styles import Alignment
from pathlib import Path

from component.scripts.gee import reduce_regions
from component.scripts.scripts import (
    map_matrix_to_dict,
    parse_result,
    read_from_csv,
    map_matrix_to_dict,
)
from component.scripts.colab_combining_files import (
    sanitize_description,
    append_excel_files,
)

<IPython.core.display.Javascript object>

Input parameters

In [13]:
DEM_DEFAULT = "CGIAR/SRTM90_V4"

# Define the translation matrix between ESA and MGCI LC classes

LC_MAP_MATRIX = Path("content/corine_lc_map_matrix2.csv")
TRANSITION_MATRIX_FILE = Path("content/default_transition_matrix.csv")

# Check they both exist
assert LC_MAP_MATRIX.exists(), "LC map matrix file not found"
assert TRANSITION_MATRIX_FILE.exists(), "Transition matrix file not found"

In [22]:
admin_asset_id = "projects/ee-xavidelamo/assets/M49Countries"

admin_asset_property_name = "M49Name"


# Land cover assets
sub_b_year = {
    "baseline": {
        "base": {
            "asset": "COPERNICUS/CORINE/V20/100m/2000",
            "year": 2000,
        },
        "report": {
            "asset": "COPERNICUS/CORINE/V20/100m/2012",
            "year": 2015,
        },
    },
    # And the reporting year
    2: {"asset": "COPERNICUS/CORINE/V20/100m/2018", "year": 2018},
}

In [15]:
# Set the base directory
base_dir = Path("content/sdg1542/sub_b")
base_dir.mkdir(parents=True, exist_ok=True)

In [16]:
csv_path = base_dir/"raw_stats"
raw_reports = base_dir/"raw_reports"
final_report = base_dir/"final_report"

error_log_file_path = base_dir / "error_log.csv"

final_report_file_path = final_report / "final_report.xlsx"

# Create the directories
csv_path.mkdir(parents=True, exist_ok=True)
raw_reports.mkdir(parents=True, exist_ok=True)
final_report.mkdir(parents=True, exist_ok=True)

In [17]:
export = True 
debug = False 

Temporary output parameters


In [18]:
# admin boundary feature collection
admin_boundaries = ee.FeatureCollection(admin_asset_id)

# list to process
list_of_countries = admin_boundaries.aggregate_array(admin_asset_property_name).getInfo()

print ("Length of admin boundaries to process", len(list_of_countries))

list_of_countries = list(set(list_of_countries)) # remove dupicates

print ("Length of distinct admin boundaries to process", (len(set(list_of_countries))))


Length of admin boundaries to process 245
Length of distinct admin boundaries to process 245


Read the default land cover remapping table and convert it to a dictionary

In [19]:
default_map_matrix = map_matrix_to_dict(LC_MAP_MATRIX)

Set the default transition matrix file path.

In [20]:
default_transition_matrix_path = TRANSITION_MATRIX_FILE
print(default_transition_matrix_path)

content/default_transition_matrix.csv


Select years of land cover to process

In [23]:
# extracts the years from the b_years dictionary
from component.scripts.scripts import get_b_years


years = get_b_years(sub_b_year)
years

[({'asset': 'COPERNICUS/CORINE/V20/100m/2000', 'year': 2000},
  {'asset': 'COPERNICUS/CORINE/V20/100m/2012', 'year': 2015},
  {'asset': 'COPERNICUS/CORINE/V20/100m/2018', 'year': 2018})]

### 8) Calculate area statistics by country
* Runs for each country and each mountain biobelt
* Gets area of land cover reclassified into the 10 SEAM classes
* Repeat for each year specified


In [25]:
counter=0 # starting place of counter used to keep track of number of tasks that are being run

for aoi_name in list_of_countries:

  aoi = admin_boundaries.filter(ee.Filter.eq(admin_asset_property_name,aoi_name))#.first()

  process = ee.FeatureCollection([
      ee.Feature(
          None,
          reduce_regions(
              aoi,
              remap_matrix=default_map_matrix,
              rsa=False,
              dem=DEM_DEFAULT,
              lc_years= year,
              transition_matrix=default_transition_matrix_path
          )
      ).set("process_id", "_".join([str(y["year"]) for y in year]))
  for year in years 
  ])

  task_name = sanitize_description(f"sub_b_{aoi_name}")

  task = ee.batch.Export.table.toDrive(
      **{  #asterisks unpack dictionary into keyword arguments format
          "collection": process,
          "description": task_name,
          "fileFormat": "CSV",
          "folder":"sdg1542/sub_b/raw_stats",
      }
  )

  counter+=1

  print (f"\r process {counter}/{len(list_of_countries)} {aoi_name} ", end="") #print in place (remove \r and end="" for verbose version)

  if export:
    task.start()


 process 245/245 Brunei Darussalam Malvinas) Saba nds Islands  eland 

In [None]:
from component.scripts.scripts import get_sub_b_data_reports


counter = 0
for stats_csv_file_path in csv_path.glob("[!.]*.csv"):
    counter += 1

    # # Clean the AOI name
    aoi_name_clean = str(sanitize_description(aoi_name))

    message = f"Process {counter}, {stats_csv_file_path}"

    try:
        # Read the results from the CSV file and parse it to a dictionary
        results = read_from_csv(stats_csv_file_path)

        kwargs = {
            "results" : results,
            "sub_b_year" : sub_b_year,
            "transition_matrix": default_transition_matrix_path,
            "geo_area_name": aoi_name,
            "ref_area": " ",
            "source_detail": " ",
        }

        sub_b_reports = get_sub_b_data_reports(**kwargs)
        # sub b reports
        er_mtn_dgrp_df = pd.concat([report[0] for report in sub_b_reports])
        er_mtn_dgda_df = pd.concat([report[1] for report in sub_b_reports])

        # Define the output report file path
        report_file_path = raw_reports / f"{aoi_name_clean}.xlsx"

        # This will create the excel file with the reports
        with pd.ExcelWriter(report_file_path) as writer:
            er_mtn_dgda_df.to_excel(
                writer, sheet_name="Table4_ER_MTN_DGRDA", index=False
            )
            er_mtn_dgrp_df.to_excel(
                writer, sheet_name="Table5_ER_MTN_DGRDP", index=False
            )

            for sheetname in writer.sheets:
                worksheet = writer.sheets[sheetname]
                for col in worksheet.columns:
                    max_length = 0
                    column = col[0]
                    for cell in col:
                        try:
                            if len(str(cell.value)) > max_length:
                                max_length = len(cell.value)
                        except:
                            pass
                    adjusted_width = max(max_length, len(str(column.value))) + 4
                    worksheet.column_dimensions[
                        get_column_letter(column.column)
                    ].width = adjusted_width

                    # Align "obs_value" column to the right
                    if "OBS" in column.value:
                        for cell in col:
                            cell.alignment = Alignment(horizontal="right")

    except Exception as e:
        # If an error occurs, catch the exception and handle it
        message = f"process {counter}, {stats_csv_file_path.stem}, Error: {e}"

        # Get the current time
        current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")

        # Write the error message and file name to the error log file
        error_info = pd.DataFrame(
            [[stats_csv_file_path.stem, str(e), current_time]],
            columns=["File Name", "Error Message", "Time"],
        )

        mode = "w" if not os.path.exists(error_log_file_path) else "a"
        header = False if os.path.exists(error_log_file_path) else True

        # Append or write to the error log file
        error_info.to_csv(error_log_file_path, mode=mode, header=header, index=False)

    print(message)


In [None]:
raw_reports_files = list(raw_reports.glob("[!.]*.xlsx"))

# Print the number of Excel files found in the folder
print(f"Number of Excel files in folder: {len(list(raw_reports_files))}")

append_excel_files(file_paths=raw_reports_files,num_sheets=3,output_file_path=str(final_report_file_path))