# Calculating Forest Loss at the Ethnologue Polygon Level

In [1]:
import os
import ee
import geemap 
import pandas as pd

#ee.Authenticate()
ee.Initialize()

In [2]:
# IMPORTING DATA 

# Define the bounding box of Colombia (using a shapefile uploaded to the assets folder)
ethnologue = ee.FeatureCollection('projects/ee-juamiji/assets/langa_no_overlap_biggest_clean')

# Import the Forest loss image
flossHansen = ee.Image("UMD/hansen/global_forest_change_2024_v1_12")

In [3]:
# PREPARING THE DATA

# Select the tree cover and loss bands, and filter for tree cover greater than 30% in 2000
treecover00=flossHansen.select('treecover2000').gte(30)
treeloss=flossHansen.select('loss')

In [4]:
# DEFINING A FUNCTION TO CALCULATE AREA

def calculate_area(feature):
    # Multiply treecover and loss by pixel area to get square meters
    treecover_area_img = treecover00.multiply(ee.Image.pixelArea())
    treeloss_area_img = treeloss.multiply(ee.Image.pixelArea())

    # Sum over the feature's geometry
    treecover_area = treecover_area_img.reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=feature.geometry(),
        scale=30,
        maxPixels=1e13,
        bestEffort=True
    ).get('treecover2000')  
    
    loss_area = treeloss_area_img.reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=feature.geometry(),
        scale=30,
        maxPixels=1e13,
        bestEffort=True
    ).get('loss')  

    # Return feature with new properties
    return feature.set({
        'forestcover00': treecover_area,
        'forestloss': loss_area
    })

# Apply the function to each polygon
ethnologue_forest = ethnologue.map(calculate_area)


In [5]:
# Remove geometry for CSV export
ethnologue_no_geom = ethnologue_forest.map(lambda f: f.setGeometry(None))

In [6]:
# Get the first feature
first_feature = ethnologue_no_geom.first()

# Get the property names (i.e., column names)
property_keys = first_feature.propertyNames()

# Evaluate and print
print(property_keys.getInfo())

['forestloss', 'forestcover00', 'NAM_LABEL', 'ID_ISO_A2', 'ID_FIPS', 'LMP_CLASS', 'NAM_ANSI', 'G', 'CNT', 'LMP_POP1', 'LMP_C1', 'NAME2', 'C1', 'FAMILY', 'ID_lang', 'POP', 'Shape_Leng', 'CODE', 'NAME_PROP', 'FAMILYPROP', 'Shape_Area', 'FID_langa', 'ID', 'ID_ISO_A3', 'system:index']


In [35]:
# Keep only desired columns
ethnologue_filtered = ethnologue_no_geom.select(['forestloss', 'forestcover00', 'ID'])

In [36]:
ethnologue_filtered = ethnologue_filtered.filter(ee.Filter.gt('forestcover00', 0))

In [37]:
total_count = ethnologue_filtered.size().getInfo()
print(f"Total features: {total_count}")

EEException: Too many concurrent aggregations.

In [26]:
# Get total number of features quickly
total_count = ethnologue_filtered.size().getInfo()
print(f"Total features: {total_count}")

# Export a small sample (first 3000 rows)
subset = ee.FeatureCollection(ethnologue_filtered.toList(3000))


Total features: 7087


In [None]:
# First half: features 0 to 3000 (roughly half)
first_part = ee.FeatureCollection(
    ethnologue_filtered.toList(3000)  # 0 to 3000 inclusive
)

# Export first half
task1 = ee.batch.Export.table.toDrive(
    collection=first_part,
    description='ethnologue_part1',
    folder='GEE_exports',
    fileNamePrefix='ethnologue_part1',
    fileFormat='CSV'
)
task1.start()

In [34]:
# Second half: features 3000 to 6000
second_part = ee.FeatureCollection(
    ethnologue_filtered.toList(3000, 1000)  # 3544 to end
)

# Export second half
task2 = ee.batch.Export.table.toDrive(
    collection=second_part,
    description='ethnologue_part2',
    folder='GEE_exports',
    fileNamePrefix='ethnologue_part2',
    fileFormat='CSV'
)
task2.start()

In [32]:
# Third half: features 6000 to 7086
third_part = ee.FeatureCollection(
    ethnologue_filtered.toList(6000, 1087)  # 3544 to end
)

# Export third half
task3 = ee.batch.Export.table.toDrive(
    collection=third_part,
    description='ethnologue_part3',
    folder='GEE_exports',
    fileNamePrefix='ethnologue_part3',
    fileFormat='CSV'
)
task3.start()

In [38]:
# Example: Export first 500 features only
subset = ethnologue_filtered.toList(ethnologue_filtered.size())

# Export the results to a CSV file
task = ee.batch.Export.table.toDrive(
    collection=subset,
    description='subsetgt0',
    folder='GEE_exports',
    fileNamePrefix='subsetgt0',
    fileFormat='CSV'
)

task.start()

In [19]:
# Export to shapefile
task = ee.batch.Export.table.toDrive(
    collection=ethnologue_forest,
    description='ethnologue_forest_shapefile',
    folder='EarthEngineExports',
    fileFormat='SHP'
)

task.start()
print("Exporting shapefile to Google Drive...")

Exporting shapefile to Google Drive...


In [9]:
# Checking the results
sample = ethnologue_forest.limit(10)
features = sample.getInfo()['features']
for f in features:
    print(f['properties'])

{'ID': 'AUJ-LBY', 'forestcover00': 0}
{'ID': 'SWN-LBY', 'forestcover00': 0}
{'ID': 'SIZ-EGY', 'forestcover00': 0}
{'ID': 'CNU-DZA', 'forestcover00': 104502.408108341}
{'ID': 'TZM-MAR', 'forestcover00': 2419983763.55267}
{'ID': 'SHI-MAR', 'forestcover00': 353911948.01575214}
{'ID': 'TZM-DZA', 'forestcover00': 0}
{'ID': 'SHI-DZA', 'forestcover00': 0}
{'ID': 'KAB-DZA', 'forestcover00': 3501564289.6009374}
{'ID': 'JBN-LBY', 'forestcover00': 0}


In [11]:
# Keep only features where forestcover00 is greater than 0
ethnologue_forest_no0 = ethnologue_forest.filter(ee.Filter.gt('forestcover00', 0))

In [12]:
# EXPORTING THE RESULTS

task = ee.batch.Export.table.toDrive(
    collection=ethnologue_forest_no0,
    description='ethnologue_forest_export',
    folder="EarthEngineExports",
    fileFormat='CSV'  # or 'GeoJSON'
)

task.start()
print(f"Exporting to Google Drive...")

Exporting to Google Drive...


In [14]:
out_dir = os.path.expanduser("~/Downloads")

### NOTE: see the progress of exports at [GEE Task Manager](https://code.earthengine.google.com/tasks)