<a href="https://colab.research.google.com/github/ruany-doehnert/Deforestation_Amazon/blob/main/data_source.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Install and authenticate Earth Engine in Colab
# !pip install earthengine-api --quiet
!pip install rasterio

Collecting rasterio
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m49.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3-none-any.whl (15 kB)
Downloading click_plugins-1.1.1.2-py2.py3-none-any.whl (11 kB)
Installing collected packages: cligj, click-plugins, affine, rasterio
Successfully installed affine-2.4.0 click-plugins-1.1.1.2 cligj-0.7.2 rasterio-1.4.3


In [7]:
import rasterio
import numpy as np
import pandas as pd
import ee
import folium

In [4]:
# conect with google drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [25]:
ee.Authenticate()
ee.Initialize(project='amazon-deforestation-462101')

# Define region of interest (example: small Amazon region)
region = ee.Geometry.BBox(-71, -17, -44, -1)

# Hansen treecover
gfc = ee.Image('UMD/hansen/global_forest_change_2022_v1_10')
treecover = gfc.select('treecover2000')

# Landsat 8 median composite for 2021
landsat = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
    .filterDate('2021-01-01', '2021-12-31') \
    .filterBounds(region) \
    .median()

# Calculate NDVI and NBR
ndvi = landsat.normalizedDifference(['SR_B5', 'SR_B4']).rename('NDVI')
nbr = landsat.normalizedDifference(['SR_B5', 'SR_B7']).rename('NBR')

# Elevation
elevation = ee.Image('USGS/SRTMGL1_003').rename('elevation')

# Stack features
features = treecover.rename('treecover') \
    .addBands([ndvi, nbr, elevation])

# Sample points in the region
samples = features.sample(
    region=region,
    scale=30,
    numPixels=2000,
    seed=42,
    geometries=True
)

# Create label: forest if treecover > 30
def label_fn(f):
    return f.set('label', ee.Number(f.get('treecover')).gt(30).int())
samples = samples.map(label_fn)

In [24]:
# Define the bounding box coordinates
lon_min, lat_min, lon_max, lat_max = -71, -17, -44, -1

# Center of the box
center_lat = (lat_min + lat_max) / 2
center_lon = (lon_min + lon_max) / 2

# Create a folium map centered on the region
m = folium.Map(location=[center_lat, center_lon], zoom_start=8)

# Add the bounding box as a rectangle
folium.Rectangle(
    bounds=[ [lat_min, lon_min], [lat_max, lon_max] ],
    color='green',
    fill=True,
    fill_opacity=0.2
).add_to(m)

# Display the map
m

In [26]:
# Export to Drive
task = ee.batch.Export.table.toDrive(
    collection=samples,
    description='export_training_dataset_with_features',
    folder='earthengine_export',
    fileNamePrefix='training_data_amazon_features',
    fileFormat='CSV',
    selectors=['treecover', 'NDVI', 'NBR', 'elevation', 'label']
)
task.start()
print("🚀 Export started! Check your Google Drive shortly.")

🚀 Export started! Check your Google Drive shortly.
