In [1]:
import ee
import pandas as pd
import requests
import os
import time
from datetime import datetime, timedelta

In [2]:
# ==========================================
# 1. INITIALIZATION & CONFIGURATION
# ==========================================

# Initialize Earth Engine
try:
    ee.Initialize(project='vae-wgan')
except Exception as e:
    print("Authenticating Earth Engine...")
    ee.Authenticate()
    ee.Initialize()

In [15]:
# Configuration
CSV_FILE = "https://firms.modaps.eosdis.nasa.gov/data/country/modis/2024/modis_2024_Brazil.csv"  # CHANGE THIS to your actual filename
OUTPUT_DIR = 'modis_dataset_brazil'
FIRE_DIR = os.path.join(OUTPUT_DIR, 'fire_anomalies')
NORMAL_DIR = os.path.join(OUTPUT_DIR, 'normal_reference')

# Image Settings
IMG_SIZE = 64        # Input size for VAE (64x64 pixels)
SCALE = 500          # MODIS resolution (500 meters per pixel)
ROI_RADIUS = (IMG_SIZE * SCALE) / 2  # Radius in meters to get the correct crop

# MODIS Collection (Terra Surface Reflectance)
MODIS_COLLECTION = 'MODIS/061/MOD09GA'

# Create directories
os.makedirs(FIRE_DIR, exist_ok=True)
os.makedirs(NORMAL_DIR, exist_ok=True)

In [16]:
# ==========================================
# 2. HELPER FUNCTIONS
# ==========================================

def get_cloud_percentage(image, region):
    """
    Calculates the percentage of cloudy pixels in the given region
    using the 'state_1km' QA band.
    """
    try:
        qa = image.select('state_1km')

        # Bits 0-1: Cloud state (00=Clear, 01=Cloudy, 10=Mixed, 11=Not set)
        # We perform bitwise AND with 3 (binary 11) to isolate these bits.
        cloud_state = qa.bitwiseAnd(3)

        # Mask where value is 1 (Cloudy) or 2 (Mixed)
        is_cloudy = cloud_state.eq(1).Or(cloud_state.eq(2))

        # Calculate mean (percentage) of cloudy pixels
        cloud_stats = is_cloudy.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=region,
            scale=SCALE,
            maxPixels=1e9
        )

        pct = cloud_stats.get('state_1km').getInfo()
        return pct * 100 if pct is not None else 100
    except Exception as e:
        print(f"Error checking clouds: {e}")
        return 100.0  # Assume cloudy if error

In [17]:
def get_clear_image(lat, lon, target_date_str, search_window_days=14):
    """
    Searches for a clear image (<10% clouds) around the target date.
    Returns: (ee.Image object, actual_date_string) or (None, None)
    """
    target_dt = datetime.strptime(target_date_str, '%Y-%m-%d')
    point = ee.Geometry.Point([lon, lat])
    region = point.buffer(ROI_RADIUS).bounds()

    start_search = (target_dt - timedelta(days=search_window_days)).strftime('%Y-%m-%d')
    end_search = (target_dt + timedelta(days=search_window_days)).strftime('%Y-%m-%d')

    # Get collection sorted by time
    collection = (ee.ImageCollection(MODIS_COLLECTION)
                  .filterDate(start_search, end_search)
                  .filterBounds(point))

    # Get list of images (Client-side iteration required for logic)
    # We limit to 20 images to prevent timeouts
    img_list_size = collection.size().getInfo()
    if img_list_size == 0:
        return None, None

    ee_list = collection.toList(min(img_list_size, 20))
    count = ee_list.size().getInfo()

    for i in range(count):
        img = ee.Image(ee_list.get(i))

        # Check Cloud Cover
        cloud_pct = get_cloud_percentage(img, region)

        if cloud_pct < 10.0:  # Strict threshold: < 10% clouds
            date_found = ee.Date(img.get('system:time_start')).format('YYYY-MM-dd').getInfo()
            return img, date_found

    return None, None

In [18]:
def download_image(ee_image, region, output_path):
    """
    Generates a URL and downloads the image.
    """
    if os.path.exists(output_path):
        print(f"Skipping (Exists): {os.path.basename(output_path)}")
        return

    # Visualization: Band 7 (SWIR-Heat), Band 2 (NIR-Veg), Band 1 (Red)
    vis_params = {
        'min': -100.0,
        'max': 8000.0,
        'bands': ['sur_refl_b07', 'sur_refl_b02', 'sur_refl_b01'],
    }

    try:
        url = ee_image.getThumbURL({
            'region': region,
            'dimensions': f'{IMG_SIZE}x{IMG_SIZE}',
            'format': 'png',
            **vis_params
        })

        response = requests.get(url, timeout=15)
        if response.status_code == 200:
            with open(output_path, 'wb') as f:
                f.write(response.content)
            # print(f"Saved: {os.path.basename(output_path)}") # Optional: quiet mode
        else:
            print(f"Failed to download (Status {response.status_code})")

    except Exception as e:
        print(f"Download Error: {e}")

In [19]:
# ==========================================
# 3. MAIN EXECUTION
# ==========================================

def main():
    # Load Data

    df = pd.read_csv(CSV_FILE)
    print(f"Loaded CSV with {len(df)} records.")

    # FILTER: Confidence > 80
    if 'confidence' in df.columns:
        df = df[df['confidence'] > 80]
        print(f"Filtered (Confidence > 80): {len(df)} records remaining.")
    else:
        print("Warning: 'confidence' column not found. Processing all records.")

    # Shuffle to ensure variety if we stop early
    df = df.sample(frac=1, random_state=42).reset_index(drop=True)

    print("Starting download process... (Press Ctrl+C to stop)")

    # Counters
    success_fire = 0
    success_normal = 0

    for index, row in df.iterrows():
        try:
            lat = row['latitude']
            lon = row['longitude']
            fire_date = row['acq_date']  # Expecting YYYY-MM-DD

            # --- 1. DOWNLOAD FIRE ANOMALY ---
            # Define exact region
            point = ee.Geometry.Point([lon, lat])
            region = point.buffer(ROI_RADIUS).bounds()

            # Get the specific image for that day
            fire_img = (ee.ImageCollection(MODIS_COLLECTION)
                        .filterDate(fire_date, datetime.strptime(fire_date, '%Y-%m-%d') + timedelta(days=1))
                        .filterBounds(point)
                        .first())

            if fire_img:
                fire_out = os.path.join(FIRE_DIR, f"fire_{index}_{fire_date}.png")
                download_image(fire_img, region, fire_out)
                success_fire += 1
            else:
                print(f"[{index}] No Fire image found for date {fire_date}")

            # --- 2. DOWNLOAD NORMAL REFERENCE (Smart Cloud Search) ---
            # Calculate 1 year prior
            fire_dt = datetime.strptime(fire_date, '%Y-%m-%d')
            ideal_date = (fire_dt - timedelta(days=365)).strftime('%Y-%m-%d')

            # Find a clear image
            normal_img, normal_date_found = get_clear_image(lat, lon, ideal_date)

            if normal_img:
                normal_out = os.path.join(NORMAL_DIR, f"normal_{index}_{normal_date_found}.png")
                download_image(normal_img, region, normal_out)
                success_normal += 1
                print(f"[{index}] Success | Fire: {fire_date} | Normal: {normal_date_found}")
            else:
                print(f"[{index}] Skipped Normal: Too cloudy around {ideal_date}")

        except KeyboardInterrupt:
            print("\nStopping download...")
            break
        except Exception as e:
            print(f"[{index}] Unexpected Error: {e}")
            continue

    print(f"\nDownload Finished.")
    print(f"Total Fire Images: {success_fire}")
    print(f"Total Normal Images: {success_normal}")

In [None]:
if __name__ == "__main__":
    main()

Loaded CSV with 527894 records.
Filtered (Confidence > 80): 191698 records remaining.
Starting download process... (Press Ctrl+C to stop)
[0] Success | Fire: 2024-07-17 | Normal: 2023-07-04
[1] Success | Fire: 2024-02-24 | Normal: 2023-02-23
[2] Success | Fire: 2024-07-31 | Normal: 2023-07-18
[3] Success | Fire: 2024-08-21 | Normal: 2023-08-09
[4] Success | Fire: 2024-09-01 | Normal: 2023-08-21
[5] Success | Fire: 2024-06-14 | Normal: 2023-06-01
[6] Success | Fire: 2024-08-20 | Normal: 2023-08-07
[7] Success | Fire: 2024-09-13 | Normal: 2023-08-31
[8] Success | Fire: 2024-07-16 | Normal: 2023-07-03
[9] Success | Fire: 2024-09-07 | Normal: 2023-09-02
[10] Skipped Normal: Too cloudy around 2023-12-01
[11] Success | Fire: 2024-09-22 | Normal: 2023-09-10
[12] Success | Fire: 2024-09-14 | Normal: 2023-09-09
[13] Success | Fire: 2024-09-23 | Normal: 2023-09-10
[14] Success | Fire: 2024-08-29 | Normal: 2023-08-16
[15] Success | Fire: 2024-09-09 | Normal: 2023-09-09
[16] Success | Fire: 2024-0



[740] Success | Fire: 2024-08-16 | Normal: 2023-08-14
[741] Success | Fire: 2024-09-06 | Normal: 2023-08-25
[742] Success | Fire: 2024-08-24 | Normal: 2023-08-15
[743] Success | Fire: 2024-09-18 | Normal: 2023-09-07
[744] Success | Fire: 2024-08-28 | Normal: 2023-08-16
[745] Success | Fire: 2024-08-07 | Normal: 2023-07-25
[746] Success | Fire: 2024-11-09 | Normal: 2023-11-01
[747] Success | Fire: 2024-11-13 | Normal: 2023-10-31
[748] Success | Fire: 2024-07-24 | Normal: 2023-07-11
[749] Success | Fire: 2024-08-30 | Normal: 2023-08-17
[750] Success | Fire: 2024-09-11 | Normal: 2023-08-31
[751] Success | Fire: 2024-11-07 | Normal: 2023-10-27
[752] Success | Fire: 2024-09-11 | Normal: 2023-09-03
[753] Success | Fire: 2024-08-13 | Normal: 2023-07-31
[754] Success | Fire: 2024-07-30 | Normal: 2023-07-17
[755] Success | Fire: 2024-09-08 | Normal: 2023-09-04
[756] Success | Fire: 2024-09-14 | Normal: 2023-09-02
[757] Success | Fire: 2024-08-10 | Normal: 2023-07-28
[758] Success | Fire: 2024-0



[2805] Success | Fire: 2024-07-30 | Normal: 2023-07-17
[2806] Success | Fire: 2024-06-20 | Normal: 2023-06-08
[2807] Success | Fire: 2024-11-20 | Normal: 2023-11-09
[2808] Success | Fire: 2024-08-25 | Normal: 2023-08-12
[2809] Success | Fire: 2024-08-31 | Normal: 2023-08-18
[2810] Success | Fire: 2024-09-19 | Normal: 2023-09-10
[2811] Success | Fire: 2024-06-26 | Normal: 2023-06-19
[2812] Success | Fire: 2024-08-04 | Normal: 2023-07-23
[2813] Success | Fire: 2024-10-11 | Normal: 2023-10-01
[2814] Success | Fire: 2024-09-02 | Normal: 2023-08-20
[2815] Success | Fire: 2024-08-22 | Normal: 2023-08-09
[2816] Success | Fire: 2024-11-28 | Normal: 2023-11-15
[2817] Success | Fire: 2024-08-16 | Normal: 2023-08-03
[2818] Success | Fire: 2024-09-04 | Normal: 2023-08-22
[2819] Success | Fire: 2024-08-03 | Normal: 2023-07-21
[2820] Success | Fire: 2024-08-22 | Normal: 2023-08-09
[2821] Success | Fire: 2024-08-18 | Normal: 2023-08-06
[2822] Success | Fire: 2024-04-27 | Normal: 2023-04-16
[2823] Suc



[4869] Success | Fire: 2024-09-07 | Normal: 2023-08-28
[4870] Success | Fire: 2024-10-02 | Normal: 2023-09-19
[4871] Success | Fire: 2024-11-06 | Normal: 2023-10-27
[4872] Success | Fire: 2024-09-11 | Normal: 2023-09-02
[4873] Success | Fire: 2024-08-25 | Normal: 2023-08-12
[4874] Success | Fire: 2024-08-30 | Normal: 2023-08-18
[4875] Success | Fire: 2024-07-28 | Normal: 2023-07-19
[4876] Success | Fire: 2024-09-09 | Normal: 2023-08-28
[4877] Success | Fire: 2024-08-05 | Normal: 2023-07-23
[4878] Success | Fire: 2024-08-07 | Normal: 2023-07-26
[4879] Success | Fire: 2024-11-19 | Normal: 2023-11-13
[4880] Success | Fire: 2024-08-27 | Normal: 2023-08-16
[4881] Success | Fire: 2024-09-25 | Normal: 2023-09-22
[4882] Success | Fire: 2024-10-02 | Normal: 2023-09-20
[4883] Success | Fire: 2024-10-21 | Normal: 2023-10-08
[4884] Success | Fire: 2024-09-03 | Normal: 2023-09-03
[4885] Success | Fire: 2024-09-10 | Normal: 2023-08-28
[4886] Success | Fire: 2024-08-19 | Normal: 2023-08-06
[4887] Suc

In [None]:
!zip -r modis_dataset_brazil.zip /content/modis_dataset_brazil

In [54]:
import shutil
# shutil.rmtree("/content/modis_dataset")