# (1) - Conseguir data
Este es el codigo que fue empleado para conseguir la data trabajada

In [1]:
import rasterio

file_path = '/workspaces/codespaces-jupyter/data/Hansen_GFC-2023-v1.11_lossyear_10N_080W.tif'

with rasterio.open(file_path) as dataset:
    crs = dataset.crs
    transform = dataset.transform
    bounds = dataset.bounds

    print("Coordinate Reference System (CRS):", crs)
    print("Affine Transform:", transform)
    print("Bounds:", bounds)

    if crs and not transform.is_identity:
        print("The TIFF file is georeferenced.")
    else:
        print("The TIFF file is NOT georeferenced.")


Coordinate Reference System (CRS): EPSG:4326
Affine Transform: | 0.00, 0.00,-80.00|
| 0.00,-0.00, 10.00|
| 0.00, 0.00, 1.00|
Bounds: BoundingBox(left=-80.0, bottom=0.0, right=-70.0, top=10.0)
The TIFF file is georeferenced.


In [2]:
import rasterio

file_path = '/workspaces/codespaces-jupyter/data/Hansen_GFC-2023-v1.11_lossyear_10N_080W.tif'

dataset = rasterio.open(file_path)


In [12]:
import ee
import time
import logging

ee.Initialize()

# Configurar el registro de logs
logging.basicConfig(
    level=logging.INFO,  # Cambiar a logging.DEBUG para más detalles
    format='%(asctime)s - %(levelname)s - %(message)s',
)

# Cargar la imagen 'lossyear'
lossyear = ee.Image('projects/ee-thomaspradaes/assets/colombia')

# Crear máscaras
loss2023 = lossyear.eq(23).selfMask().rename('constant')
noLoss = lossyear.eq(0).selfMask().rename('constant')

# Número de muestras
num_samples = 500

# Verificar nombres de bandas
print('Band names of lossyear:', lossyear.bandNames().getInfo())
print('Band names of loss2023:', loss2023.bandNames().getInfo())
print('Band names of noLoss:', noLoss.bandNames().getInfo())

# Muestreo de puntos
logging.info('Sampling points where deforestation occurred in 2023.')
loss_points = loss2023.stratifiedSample(
    numPoints=num_samples,
    classBand='constant',
    region=lossyear.geometry(),
    scale=30,
    seed=42,
    geometries=True
).map(lambda f: f.set('label', 1))

logging.info('Sampling points where no deforestation occurred.')
no_loss_points = noLoss.stratifiedSample(
    numPoints=num_samples,
    classBand='constant',
    region=lossyear.geometry(),
    scale=30,
    seed=84,
    geometries=True
).map(lambda f: f.set('label', 0))

# Combinar puntos
all_points = loss_points.merge(no_loss_points)
logging.info(f'Total number of points sampled: {all_points.size().getInfo()}')

# Función para extraer parches de imágenes
def extract_image_patch(feature):
    half_size = 5000
    point = feature.geometry()
    point_proj = point.transform('EPSG:3857', 1)
    coords = point_proj.coordinates()
    x = coords.get(0)
    y = coords.get(1)
    square = ee.Geometry.Rectangle([
        ee.Number(x).subtract(half_size),
        ee.Number(y).subtract(half_size),
        ee.Number(x).add(half_size),
        ee.Number(y).add(half_size)
    ], 'EPSG:3857', False)
    landsat8 = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2') \
        .filterBounds(square) \
        .filterDate('2023-01-01', '2023-12-31') \
        .filter(ee.Filter.lt('CLOUD_COVER', 20)) \
        .map(lambda image: image \
             .select(['SR_B4', 'SR_B3', 'SR_B2']) \
             .multiply(0.0000275).add(-0.2) \
             .set('system:time_start', image.get('system:time_start'))) \
        .median() \
        .clip(square)
    return feature.set({'image': landsat8, 'square': square})

# Aplicar la función sobre todos los puntos
logging.info('Extracting image patches around sampled points.')
labeled_points = all_points.map(extract_image_patch)

# Gestión de tareas
MAX_ACTIVE_TASKS = 10

def get_num_active_tasks():
    task_list = ee.data.getTaskList()
    active_tasks = [task for task in task_list if task['state'] in ['READY', 'RUNNING']]
    return len(active_tasks)

# Exportar imágenes a Google Drive
points_list = labeled_points.toList(labeled_points.size())
total_samples = num_samples * 2

logging.info('Starting export of images to Google Drive.')
for i in range(total_samples):
    feature = ee.Feature(points_list.get(i))
    label = feature.get('label').getInfo()
    image = ee.Image(feature.get('image'))
    square = ee.Geometry(feature.get('square'))
    description = 'Deforestation_Image_{}'.format(i)
    fileNamePrefix = 'deforestation_{}_{}'.format(int(label), i)
    task = ee.batch.Export.image.toDrive(
        image=image,
        description=description,
        folder='EarthEngineImages',  # Opcional: especificar una carpeta en Google Drive
        fileNamePrefix=fileNamePrefix,
        scale=30,
        region=square,
        maxPixels=1e9
    )
    task.start()
    logging.info(f'Started export task {task.id} for image {i} with label {label}.')

    # Esperar si se alcanzan el máximo de tareas activas
    while get_num_active_tasks() >= MAX_ACTIVE_TASKS:
        logging.info('Maximum active tasks reached. Waiting before starting new tasks...')
        time.sleep(30)

logging.info('All export tasks have been submitted.')


Band names of lossyear: ['b1']


2024-09-26 05:28:22,453 - INFO - Sampling points where deforestation occurred in 2023.
2024-09-26 05:28:22,454 - INFO - Sampling points where no deforestation occurred.


Band names of loss2023: ['constant']
Band names of noLoss: ['constant']


2024-09-26 05:28:47,697 - INFO - Total number of points sampled: 1000
2024-09-26 05:28:47,698 - INFO - Extracting image patches around sampled points.
2024-09-26 05:28:47,704 - INFO - Starting export of images to Google Drive.
2024-09-26 05:28:50,822 - INFO - Started export task 6PIGASIBG56DZ2ZY42675QCD for image 0 with label 1.
2024-09-26 05:28:53,989 - INFO - Started export task CZBZ7OUAKD5EV3VDZ4LJKBYC for image 1 with label 1.
2024-09-26 05:28:56,581 - INFO - Started export task YUIAGA7ZWJSDIJKIJTSAPRLF for image 2 with label 1.
2024-09-26 05:28:59,666 - INFO - Started export task IAHPRSB62CUEBBT7WHPT77TE for image 3 with label 1.
2024-09-26 05:29:03,900 - INFO - Started export task SAMQPCPLBJHFNMDFRMWOMHLG for image 4 with label 1.
2024-09-26 05:29:07,438 - INFO - Started export task XHTHDL3EEL2YQM5KGDEEHAQI for image 5 with label 1.
2024-09-26 05:29:10,954 - INFO - Started export task TKMR4KQPJDCPW7BB5UR4RLRH for image 6 with label 1.
2024-09-26 05:29:14,514 - INFO - Started expo

KeyboardInterrupt: 