In [1]:
import pandas as pd

In [None]:
import os
dir = os.path.dirname(os.path.dirname(os.getcwd()))
print(dir)

In [3]:
path_eruptions_preprocessed = os.path.join(dir, "Eruption_data", "eruptions_preprocessed.csv")
df_eruptions = pd.read_csv(path_eruptions_preprocessed)
# df_eruptions.head()

In [4]:
len(df_eruptions['Eruption Number'].unique())

531

In [5]:
import numpy as np

import ee
ee.Initialize(project='bustling-syntax-392010')
print('done')

# parse and reorder bbox column to correct coordinate format for Earth Engine
def parse_bbox_string(bbox_str):
    try:
        coords = list(map(float, bbox_str.strip('[] ').split(',')))
        if len(coords) != 4:
            raise ValueError
        lat1, lon1, lat2, lon2 = coords
        return ee.Geometry.BBox(lon1, lat1, lon2, lat2)
    except Exception:
        return np.nan  # return NaN for invalid rows

df_eruptions['bbox'] = df_eruptions['bbox'].astype(str)

df_eruptions['ee_bbox'] = df_eruptions['bbox'].apply(parse_bbox_string)
df_eruptions = df_eruptions.dropna(subset=['ee_bbox'])

done


In [6]:
targets = ["300250", "243080","260010", "273030", "263250", "341090", "211060", "211040", "332020", "223030", "332010"]
df_eruptions = df_eruptions[df_eruptions['Volcano Number'].astype(str).isin(targets)]

df_eruptions = df_eruptions[df_eruptions['Start Year'] >= 2016]

In [7]:
unique_volcano_names = df_eruptions['Volcano Name'].unique()
print(unique_volcano_names)

['Bezymianny' 'Kilauea' 'Home Reef' 'Mayon' 'Barren Island' 'Etna'
 'Mauna Loa' 'Merapi']


In [8]:
print(df_eruptions['Start_Date'].describe())
print("\nUnique Start Dates:", df_eruptions['Start_Date'].nunique())
print("\nDate Range:", df_eruptions['Start_Date'].min(), "to", df_eruptions['Start_Date'].max())

count             27
unique            23
top       2022-11-27
freq               3
Name: Start_Date, dtype: object

Unique Start Dates: 23

Date Range: 2016-12-05 to 2024-12-24


# Image config

# Get data

In [None]:
# False-color viz parameters (for the lava flow) with increased contrast
false_color_vis = {
    'bands': ['B12', 'B11', 'B8A'],
    'min': 0,
    'max': 4000,  # increase max value for higher contrast
    'gamma': 0.8  # Lower gamma to enhance saturation
}

In [None]:
# Output base directory
output_dir = './sentinel_pngs/'
os.makedirs(output_dir, exist_ok=True)

print("Saving images to:", os.path.abspath(output_dir))

# YesActivity // NoActivity

In [None]:
import os
import requests
import pandas as pd
import ee
from datetime import timedelta


def download_false_color_series(row, duration_days: int, cloudy_pixel_percentage: int, output_dir: str, false_color_vis: dict):
    eruption_id = row['Volcano Name'].replace(' ', '_') + "_" + str(row['Eruption Number'])
    start_date = pd.to_datetime(row['Start_Date'])
    bbox = row['ee_bbox']  # Must be ee.Geometry.BBox or Polygon

    start_window = (start_date - timedelta(days=duration_days)).strftime('%Y-%m-%d')
    end_window = (start_date + timedelta(days=duration_days)).strftime('%Y-%m-%d')

    s2 = ee.ImageCollection("COPERNICUS/S2_HARMONIZED") \
        .filterDate(start_window, end_window) \
        .filterBounds(bbox) \
        .filterMetadata('CLOUDY_PIXEL_PERCENTAGE', 'less_than', cloudy_pixel_percentage)

    try:
        s2_list = s2.toList(s2.size())
        count = s2.size().getInfo()

        if count == 0:
            print(f"No images found for {eruption_id} in the specified range.")
            return

        # base directory for eruption
        eruption_dir = os.path.join(output_dir, eruption_id)
        no_activity_dir = os.path.join(eruption_dir, "NoActivity")
        yes_activity_dir = os.path.join(eruption_dir, "YesActivity")
        os.makedirs(no_activity_dir, exist_ok=True)
        os.makedirs(yes_activity_dir, exist_ok=True)

        for i in range(0, count):
            try:
                img = ee.Image(s2_list.get(i))
                date_str = img.date().format('YYYY-MM-dd').getInfo()
                img_date = pd.to_datetime(date_str)

                subfolder = no_activity_dir if img_date < start_date else yes_activity_dir

                # Select and visualize bands
                raw_img = img.clip(bbox).select(['B12', 'B11', 'B8A'])
                vis_img = raw_img.visualize(**false_color_vis)

                url = vis_img.getThumbURL({
                    'region': bbox.getInfo(),
                    'dimensions': 512,
                    'format': 'png'
                })

                file_path = os.path.join(subfolder, f'{eruption_id}_{date_str}.png')

                response = requests.get(url)
                if response.status_code == 200:
                    with open(file_path, 'wb') as f:
                        f.write(response.content)
                    print(f"Downloaded: {file_path}")
                else:
                    print(f"Failed ({response.status_code}) for {eruption_id} on {date_str}")

            except Exception as e:
                print(f"Error processing image {i} for {eruption_id}: {e}")

    except Exception as e:
        print(f"Failed to process {eruption_id}: {e}")


In [None]:
for _, row in df_eruptions.iterrows():
    download_false_color_series(row, duration_days=365, cloudy_pixel_percentage=20, output_dir=output_dir, false_color_vis=false_color_vis)

# Specific bbox and date

In [None]:
import os
import requests
import pandas as pd
import ee
from datetime import datetime, timedelta

ee.Initialize(project='bustling-syntax-392010')

def download_false_color_series_bbox(bbox_coords: list, volcano_name: str, volcano_number: int, center_date_str: str,
                                     duration_days: int, cloudy_pixel_percentage: int, output_dir: str,
                                     false_color_vis: dict):
    # format center date
    center_date = pd.to_datetime(center_date_str)

    bbox = ee.Geometry.BBox(*bbox_coords)

    start_window = (center_date - timedelta(days=duration_days)).strftime('%Y-%m-%d')
    end_window = (center_date + timedelta(days=duration_days)).strftime('%Y-%m-%d')

    # get sentilel 2 data
    s2 = ee.ImageCollection("COPERNICUS/S2_HARMONIZED") \
        .filterDate(start_window, end_window) \
        .filterBounds(bbox) \
        .filterMetadata('CLOUDY_PIXEL_PERCENTAGE', 'less_than', cloudy_pixel_percentage)

    try:
        s2_list = s2.toList(s2.size())
        count = s2.size().getInfo()

        if count == 0:
            print(f"No images found for {volcano_name} in the specified range.")
            return

        # directories
        volcano_id = f"{volcano_name.replace(' ', '_')}_{volcano_number}"
        volcano_dir = os.path.join(output_dir, volcano_id)
        before_dir = os.path.join(volcano_dir, "NoActivity")
        after_dir = os.path.join(volcano_dir, "YesActivity")
        os.makedirs(before_dir, exist_ok=True)
        os.makedirs(after_dir, exist_ok=True)

        for i in range(count):
            try:
                img = ee.Image(s2_list.get(i))
                date_str = img.date().format('YYYY-MM-dd').getInfo()
                img_date = pd.to_datetime(date_str)

                subfolder = before_dir if img_date < center_date else after_dir

                
                raw_img = img.clip(bbox).select(['B12', 'B11', 'B8A'])
                vis_img = raw_img.visualize(**false_color_vis)

                url = vis_img.getThumbURL({
                    'region': bbox.getInfo(),
                    'dimensions': 512,
                    'format': 'png'
                })

                file_path = os.path.join(subfolder, f'{volcano_id}_{date_str}.png')

                response = requests.get(url)
                if response.status_code == 200:
                    with open(file_path, 'wb') as f:
                        f.write(response.content)
                    print(f"Downloaded: {file_path}")
                else:
                    print(f"Failed ({response.status_code}) on {date_str}")

            except Exception as e:
                print(f"Error processing image {i}: {e}")

    except Exception as e:
        print(f"Failed to process {volcano_name}: {e}")



In [None]:
import os

output_dir = './sentinel_pngs/'
os.makedirs(output_dir, exist_ok=True)
print("Saving images to:", os.path.abspath(output_dir))

false_color_vis = {
    'bands': ['B12', 'B11', 'B8A'],
    'min': 0,
    'max': 4000,  # increase max value for higher contrast
    'gamma': 0.8  # lower gamma to enhance saturation
}

In [None]:
# bbox_info = {
#     'Volcano Name': 'Popocatepetl',
#     'Volcano Number': 341090,
#     'bbox': [-98.642, 19.003, -98.602, 19.043]
# }
# center_date_str = "2025-01-20"

bbox_info = {
    'Volcano Name': 'Stromboli',
    'Volcano Number': 223030,
    'bbox': [15.188594111321532, 38.7723757225835, 15.247343508352856, 38.81131467938869]
}
center_date_str =  "2019-07-12"

# bbox_info = {
#     'Volcano Name': 'Nyiragongo',
#     'Volcano Number': 223030,
#     'bbox': [-1.5284340969712804, 29.24300017519119, -1.515805117200142, 29.255567353688676]
# }

# "2021-05-01",


# bbox_info = {
#     'Volcano Name': 'Home Reef',
#     'Volcano Number': 243080,
#     'bbox': [-174.79, -19.007, -174.76000000000002, -18.977]
# }
# "2024-06-20",


# [-174.79, -19.007], [-174.76000000000002, -18.977] # home reef
# Home Reef
# 243080
# center_date_str="2024-06-20"

download_false_color_series_bbox(
    bbox_coords=bbox_info['bbox'],
    volcano_name=bbox_info['Volcano Name'],
    volcano_number=bbox_info['Volcano Number'],
    center_date_str=center_date_str,
    duration_days=365,
    cloudy_pixel_percentage=20,
    output_dir=output_dir,
    false_color_vis=false_color_vis
)