In [27]:
import pandas as pd

In [28]:
import os
dir = os.path.dirname(os.path.dirname(os.getcwd()))
print(dir)

c:\Users\thsat\sat_imagery_ml\sat-imagery-ml-1


In [29]:
path_eruptions_preprocessed = os.path.join(dir, "Eruption_data", "eruptions_preprocessed.csv")
df_eruptions = pd.read_csv(path_eruptions_preprocessed)
# df_eruptions.head()

In [30]:
import numpy as np

import ee
ee.Initialize(project='bustling-syntax-392010')
print('done')

# parse and reorder bbox column to correct coordinate format for Earth Engine
def parse_bbox_string(bbox_str):
    try:
        coords = list(map(float, bbox_str.strip('[] ').split(',')))
        if len(coords) != 4:
            raise ValueError
        lat1, lon1, lat2, lon2 = coords
        return ee.Geometry.BBox(lon1, lat1, lon2, lat2)
    except Exception:
        return np.nan  # return NaN for invalid rows

df_eruptions['bbox'] = df_eruptions['bbox'].astype(str)

df_eruptions['ee_bbox'] = df_eruptions['bbox'].apply(parse_bbox_string)
df_eruptions = df_eruptions.dropna(subset=['ee_bbox'])

done


In [31]:
targets = ["300250", "243080","260010", "273030", "263250", "341090", "211060", "211040", "332020", "223030", "332010"]
df_eruptions = df_eruptions[df_eruptions['Volcano Number'].astype(str).isin(targets)]

df_eruptions = df_eruptions[df_eruptions['Start Year'] >= 2016]

# df_eruptions

In [32]:
print(df_eruptions['Start_Date'].describe())
print("\nUnique Start Dates:", df_eruptions['Start_Date'].nunique())
print("\nDate Range:", df_eruptions['Start_Date'].min(), "to", df_eruptions['Start_Date'].max())

count             27
unique            23
top       2022-11-27
freq               3
Name: Start_Date, dtype: object

Unique Start Dates: 23

Date Range: 2016-12-05 to 2024-12-24


In [33]:
df_22593_Karymsky = df_eruptions[df_eruptions['Eruption Number'] == 22581]
df_22593_Karymsky

Unnamed: 0,Volcano Number,Eruption Number,Volcano Name,Start Year Modifier,Start Year,Start Year Uncertainty,Start Month,Start Day Modifier,Start Day,Start Day Uncertainty,...,End Month,End Day Modifier,End Day,End Day Uncertainty,Latitude,Longitude,Start_Date,End_Date,bbox,ee_bbox


In [34]:
df_22593_Raung = df_eruptions[df_eruptions['Eruption Number'] == 22593]
df_22593_Raung

Unnamed: 0,Volcano Number,Eruption Number,Volcano Name,Start Year Modifier,Start Year,Start Year Uncertainty,Start Month,Start Day Modifier,Start Day,Start Day Uncertainty,...,End Month,End Day Modifier,End Day,End Day Uncertainty,Latitude,Longitude,Start_Date,End_Date,bbox,ee_bbox


In [35]:
df_22608_bulusan = df_eruptions[df_eruptions['Eruption Number'] == 22608]
df_22608_bulusan

Unnamed: 0,Volcano Number,Eruption Number,Volcano Name,Start Year Modifier,Start Year,Start Year Uncertainty,Start Month,Start Day Modifier,Start Day,Start Day Uncertainty,...,End Month,End Day Modifier,End Day,End Day Uncertainty,Latitude,Longitude,Start_Date,End_Date,bbox,ee_bbox


# Image config

# Get data

In [36]:
# False-color viz parameters (for the lava flow) with increased contrast
false_color_vis = {
    'bands': ['B12', 'B11', 'B8A'],
    'min': 0,
    'max': 4000,  # increase max value for higher contrast
    'gamma': 0.8  # Lower gamma to enhance saturation
}

In [37]:
# Output base directory
output_dir = './sentinel_pngs/'
os.makedirs(output_dir, exist_ok=True)

print("Saving images to:", os.path.abspath(output_dir))

Saving images to: c:\Users\thsat\sat_imagery_ml\sat-imagery-ml-1\1_DatasetCharacteristics\EruptionImages\sentinel_pngs


# YesActivity // NoActivity

In [38]:
import os
import requests
import pandas as pd
import ee
from datetime import timedelta


def download_false_color_series(row, duration_days: int, cloudy_pixel_percentage: int, output_dir: str, false_color_vis: dict):
    eruption_id = row['Volcano Name'].replace(' ', '_') + "_" + str(row['Eruption Number'])
    start_date = pd.to_datetime(row['Start_Date'])
    bbox = row['ee_bbox']  # Must be ee.Geometry.BBox or Polygon

    start_window = (start_date - timedelta(days=duration_days)).strftime('%Y-%m-%d')
    end_window = (start_date + timedelta(days=duration_days)).strftime('%Y-%m-%d')

    s2 = ee.ImageCollection("COPERNICUS/S2_HARMONIZED") \
        .filterDate(start_window, end_window) \
        .filterBounds(bbox) \
        .filterMetadata('CLOUDY_PIXEL_PERCENTAGE', 'less_than', cloudy_pixel_percentage)

    try:
        s2_list = s2.toList(s2.size())
        count = s2.size().getInfo()

        if count == 0:
            print(f"No images found for {eruption_id} in the specified range.")
            return

        # Create base directory for eruption
        eruption_dir = os.path.join(output_dir, eruption_id)
        no_activity_dir = os.path.join(eruption_dir, "NoActivity")
        yes_activity_dir = os.path.join(eruption_dir, "YesActivity")
        os.makedirs(no_activity_dir, exist_ok=True)
        os.makedirs(yes_activity_dir, exist_ok=True)

        for i in range(0, count):
            try:
                img = ee.Image(s2_list.get(i))
                date_str = img.date().format('YYYY-MM-dd').getInfo()
                img_date = pd.to_datetime(date_str)

                # Decide subfolder based on image date
                subfolder = no_activity_dir if img_date < start_date else yes_activity_dir

                # Select and visualize bands
                raw_img = img.clip(bbox).select(['B12', 'B11', 'B8A'])
                vis_img = raw_img.visualize(**false_color_vis)

                url = vis_img.getThumbURL({
                    'region': bbox.getInfo(),
                    'dimensions': 512,
                    'format': 'png'
                })

                file_path = os.path.join(subfolder, f'{eruption_id}_{date_str}.png')

                response = requests.get(url)
                if response.status_code == 200:
                    with open(file_path, 'wb') as f:
                        f.write(response.content)
                    print(f"Downloaded: {file_path}")
                else:
                    print(f"Failed ({response.status_code}) for {eruption_id} on {date_str}")

            except Exception as e:
                print(f"Error processing image {i} for {eruption_id}: {e}")

    except Exception as e:
        print(f"Failed to process {eruption_id}: {e}")


In [39]:
# Example loop over a DataFrame containing eruption data
for _, row in df_eruptions.iterrows():
    download_false_color_series(row, duration_days=365, cloudy_pixel_percentage=20, output_dir=output_dir, false_color_vis=false_color_vis)

Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2023-12-25.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2023-12-25.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2023-12-25.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2023-12-25.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2024-01-04.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2024-01-04.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2024-01-04.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2024-01-04.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2024-01-07.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2024-01-07.png
Downloaded: ./sentinel_pngs/Bezymianny_22605\NoActivity\Bezymianny_22605_2024-01-09.png
Downloaded: ./sentinel_pngs/Bezy

# archive

In [None]:
# # without yesactivity noactivity subfolders

# import requests
# from datetime import timedelta


# def download_false_color_series(row, duration_days: int, cloudy_pixel_percentage: int):
#     eruption_id = row['Volcano Name'].replace(' ', '_') + "_" + str(row['Eruption Number'])
#     start_date = pd.to_datetime(row['Start_Date'])
#     bbox = row['ee_bbox']  # Must be ee.Geometry.BBox or Polygon

#     start_window = (start_date - timedelta(days=duration_days)).strftime('%Y-%m-%d')
#     end_window = (start_date + timedelta(days=duration_days)).strftime('%Y-%m-%d')

#     s2 = ee.ImageCollection("COPERNICUS/S2_HARMONIZED") \
#         .filterDate(start_window, end_window) \
#         .filterBounds(bbox) \
#         .filterMetadata('CLOUDY_PIXEL_PERCENTAGE', 'less_than', cloudy_pixel_percentage)

#     try:
#         s2_list = s2.toList(s2.size())
#         count = s2.size().getInfo()

#         if count == 0:
#             print(f"No images found for {eruption_id} in the specified range.")
#             return

#         eruption_dir = os.path.join(output_dir, eruption_id)
#         os.makedirs(eruption_dir, exist_ok=True)


#         for i in range(0, count):
#             try:
#                 img = ee.Image(s2_list.get(i))
#                 date_str = img.date().format('YYYY-MM-dd').getInfo()

#                 # select raw bands before visualizing
#                 raw_img = img.clip(bbox).select(['B12', 'B11', 'B8A'])
#                 #print("Raw bands:", raw_img.bandNames().getInfo())

#                 vis_img = raw_img.visualize(**false_color_vis)
#                 #print("selected bands:", vis_img.bandNames().getInfo())

#                 url = vis_img.getThumbURL({
#                     'region': bbox.getInfo(),
#                     'dimensions': 512,
#                     'format': 'png'
#                 })

#                 file_path = os.path.join(eruption_dir, f'{eruption_id}_{date_str}.png')

#                 response = requests.get(url)
#                 if response.status_code == 200:
#                     with open(file_path, 'wb') as f:
#                         f.write(response.content)
#                     print(f"Downloaded: {file_path}")
#                 else:
#                     print(f"Failed ({response.status_code}) for {eruption_id} on {date_str}")

#             except Exception as e:
#                 print(f"Error processing image {i} for {eruption_id}: {e}")

#     except Exception as e:
#         print(f"Failed to process {eruption_id}: {e}")

In [None]:
# # Example loop over a DataFrame containing eruption data
# for _, row in df_eruptions.iterrows():
#     download_false_color_series(row, duration_days=365, cloudy_pixel_percentage=20)