In [37]:
import pandas as pd

In [38]:
import os
base_dir = os.path.dirname(os.getcwd())
path_eruptions = os.path.join(base_dir, "Eruption_data", "GVP_Eruption_Search_Result.xlsx")

In [39]:
df_eruptions = pd.read_excel(path_eruptions,
                             sheet_name='Eruption List',
                             header=1,
                             engine='openpyxl',
                             )

In [40]:
df_eruptions.head()

Unnamed: 0,Volcano Number,Volcano Name,Eruption Number,Eruption Category,Area of Activity,VEI,VEI Modifier,Start Year Modifier,Start Year,Start Year Uncertainty,...,Evidence Method (dating),End Year Modifier,End Year,End Year Uncertainty,End Month,End Day Modifier,End Day,End Day Uncertainty,Latitude,Longitude
0,300130,Karymsky,22609,Confirmed Eruption,,,,,2025,,...,Observations: Reported,>,2025.0,,5.0,,2.0,,54.049,159.443
1,273010,Bulusan,22608,Confirmed Eruption,,,,,2025,,...,Observations: Reported,>,2025.0,,5.0,,2.0,,12.769,124.056
2,334050,Northern EPR at 9.8°N,22613,Confirmed Eruption,"Tica hydrothermal vent, 9°50’N (9.83°N)",0.0,,,2025,,...,Observations: Reported,,2025.0,,4.0,,29.0,,9.83,-104.3
3,300260,Klyuchevskoy,22610,Confirmed Eruption,,,,,2025,,...,Observations: Reported,>,2025.0,,5.0,,2.0,,56.056,160.642
4,371020,Reykjanes,22612,Confirmed Eruption,,,,,2025,,...,Observations: Reported,,2025.0,,4.0,,1.0,,63.817,-22.717


In [41]:
print(df_eruptions.dtypes)

Volcano Number                int64
Volcano Name                 object
Eruption Number               int64
Eruption Category            object
Area of Activity             object
VEI                         float64
VEI Modifier                 object
Start Year Modifier          object
Start Year                    int64
Start Year Uncertainty      float64
Start Month                 float64
Start Day Modifier           object
Start Day                   float64
Start Day Uncertainty       float64
Evidence Method (dating)     object
End Year Modifier            object
End Year                    float64
End Year Uncertainty        float64
End Month                   float64
End Day Modifier             object
End Day                     float64
End Day Uncertainty         float64
Latitude                    float64
Longitude                   float64
dtype: object


In [42]:
df_eruptions = df_eruptions.astype({col: 'string' for col in df_eruptions.select_dtypes(include='object').columns})

int_cols = ['Start Year', 'Start Month', 'Start Day', 'End Year', 'End Month', 'End Day']
df_eruptions[int_cols] = df_eruptions[int_cols].astype('Int64')


print(df_eruptions.dtypes)

Volcano Number                       int64
Volcano Name                string[python]
Eruption Number                      int64
Eruption Category           string[python]
Area of Activity            string[python]
VEI                                float64
VEI Modifier                string[python]
Start Year Modifier         string[python]
Start Year                           Int64
Start Year Uncertainty             float64
Start Month                          Int64
Start Day Modifier          string[python]
Start Day                            Int64
Start Day Uncertainty              float64
Evidence Method (dating)    string[python]
End Year Modifier           string[python]
End Year                             Int64
End Year Uncertainty               float64
End Month                            Int64
End Day Modifier            string[python]
End Day                              Int64
End Day Uncertainty                float64
Latitude                           float64
Longitude  

In [43]:
df_eruptions.head()

Unnamed: 0,Volcano Number,Volcano Name,Eruption Number,Eruption Category,Area of Activity,VEI,VEI Modifier,Start Year Modifier,Start Year,Start Year Uncertainty,...,Evidence Method (dating),End Year Modifier,End Year,End Year Uncertainty,End Month,End Day Modifier,End Day,End Day Uncertainty,Latitude,Longitude
0,300130,Karymsky,22609,Confirmed Eruption,,,,,2025,,...,Observations: Reported,>,2025,,5,,2,,54.049,159.443
1,273010,Bulusan,22608,Confirmed Eruption,,,,,2025,,...,Observations: Reported,>,2025,,5,,2,,12.769,124.056
2,334050,Northern EPR at 9.8°N,22613,Confirmed Eruption,"Tica hydrothermal vent, 9°50’N (9.83°N)",0.0,,,2025,,...,Observations: Reported,,2025,,4,,29,,9.83,-104.3
3,300260,Klyuchevskoy,22610,Confirmed Eruption,,,,,2025,,...,Observations: Reported,>,2025,,5,,2,,56.056,160.642
4,371020,Reykjanes,22612,Confirmed Eruption,,,,,2025,,...,Observations: Reported,,2025,,4,,1,,63.817,-22.717


In [27]:
cols = [
    'Volcano Number', 'Eruption Number','Volcano Name', 'Start Year Modifier', 'Start Year', 'Start Year Uncertainty',
    'Start Month', 'Start Day Modifier', 'Start Day', 'Start Day Uncertainty',
    'End Year Modifier', 'End Year', 'End Year Uncertainty', 'End Month',
    'End Day Modifier', 'End Day', 'End Day Uncertainty', 'Latitude', 'Longitude'
]
df_eruptions = df_eruptions[cols]

In [28]:
# keep dates from 2010 on
df_eruptions = df_eruptions[df_eruptions['Start Year'] >= 2010]

In [29]:
nan_count = df_eruptions[['Start Year', 'Start Month', 'Start Day', 'End Year', 'End Month', 'End Day']].isna().sum()
print("rows df_eruptions:", len(df_eruptions))
print("\nnan count:", nan_count)

rows df_eruptions: 531

nan count: Start Year     0
Start Month    0
Start Day      0
End Year       0
End Month      0
End Day        0
dtype: int64


In [30]:
## create Start_Date and End_Date cols

def to_date(row, which="End"):
    if which == "End":
        year_col, month_col, day_col = 'End Year', 'End Month', 'End Day'
    elif which == "Start":
        year_col, month_col, day_col = 'Start Year', 'Start Month', 'Start Day'
    else:
        raise ValueError("invalid input, which must be 'End' or 'Start'")

    try:
        if pd.isna(row[year_col]) or pd.isna(row[month_col]) or pd.isna(row[day_col]):
            return pd.NaT
        year = int(row[year_col])
        month = int(row[month_col])
        day = int(row[day_col])
        return pd.to_datetime(f"{year}-{month}-{day}", errors='coerce')
    except Exception:
        return pd.NaT


df_eruptions['Start_Date'] = df_eruptions.apply(lambda row: to_date(row, "Start"), axis=1)
df_eruptions['End_Date'] = df_eruptions.apply(lambda row: to_date(row, "End"), axis=1)

In [31]:
df_eruptions = df_eruptions.sort_values(by='Start_Date', ascending=False)

In [32]:
# import webbrowser
# df_eruptions.to_html('eruptions.html')
# webbrowser.open('eruptions.html')

In [33]:
# bounding boxes for certain eruptions
# format: [min_lat, min_lon, max_lat, max_lon], lower left and upper right corners
# volcano number is GVP volcano number, is unqique

bounding_boxes = [
    {'Volcano Number': 300130, 'bbox': [54.03165078433657, 159.40758242983895, 54.06904233820455, 159.4898004107094]}, # Karymsky
    {'Volcano Number': 263340, 'bbox': [-8.13365743655555, 114.03740097860978, -8.104902974500229, 114.07047891553886]}, # Raung
    {'Volcano Number': 267010, 'bbox': [2.2872307470547644, 125.34161039956795, 2.3288981799211714, 125.38862148967762]}, # Ruang
    {'Volcano Number': 273010, 'bbox': [12.76037572885907, 124.04051915187372, 12.780391229785751, 124.06891850697968]}, # Bulusan
    {'Volcano Number': 211060, 'bbox': [37.72695190822104, 14.9637738913999, 37.76972705091083, 15.029790595078342]}, # Etna, funktioniert
    {'Volcano Number': 263250, 'bbox': [-7.558128149472474, 110.42480120723107, -7.530339943833005, 110.4584875567422]}, # Merapi
    {'Volcano Number': 300250, 'bbox': [55.92212886517477, 160.44419106087273, 56.12909802867624, 160.81048399248417]}, # Bezymmiany
    {'Volcano Number': 243080, 'bbox': [-18.992073589786386, -174.76461237119221, -18.990251166454446, -174.76160115487093]}, # home reef
    {'Volcano Number': 260010, 'bbox': [12.278092438651273, 93.85415620558221, 12.29307603339042, 93.87083888263876]}, # Barren Island
    {'Volcano Number': 273030, 'bbox': [13.251680695948448, 123.68162850034646, 13.258495973451303, 123.69037123695682]}, # Mayon
    {'Volcano Number': 263250, 'bbox': [-7.54848947152449, 110.43661067960318, -7.5361374858815156, 110.45276139754975]}, # Merapi
    {'Volcano Number': 341090, 'bbox': [19.01820908782066, -98.63045291873857, 19.029433068768977, -98.61531502012932]}, # Popocatepetl
    {'Volcano Number': 332010, 'bbox': [19.401829379353593, -155.29181153443545, 19.41485986784894, -155.27631498422463]}, # Kilauea
    {'Volcano Number': 223030, 'bbox': [-1.5284340969712804, 29.24300017519119, -1.515805117200142, 29.255567353688676]}, # Nyiangongo
    {'Volcano Number': 332020, 'bbox': [19.450703641624916, -155.61703705423673, 19.490837189929543, -155.56458859788555]}, # Mauna Loa
    {'Volcano Number': 211040, 'bbox': [38.788380112816625, 15.205188939415372, 38.798373664433605, 15.21985373830932]}, # Stromboli
    {'Volcano Number': 211060, 'bbox': [37.748944714150824, 14.989887344321543, 37.753064934424394, 14.996524430173597]}, # Etna

]

# {'Volcano Number': 300250, 'bbox': [55.92212886517477, 160.44419106087273, 56.12909802867624, 160.81048399248417]}, # template

# there are multiple eruptions for each volcano, but unique eruption numbers

In [34]:
df_bboxes = pd.DataFrame(bounding_boxes)
df_eruptions = df_eruptions.merge(df_bboxes, on='Volcano Number', how='left')

In [35]:
df_eruptions_223030 = df_eruptions[df_eruptions['Volcano Number'] == 223030]
df_eruptions_223030

Unnamed: 0,Volcano Number,Eruption Number,Volcano Name,Start Year Modifier,Start Year,Start Year Uncertainty,Start Month,Start Day Modifier,Start Day,Start Day Uncertainty,...,End Year Uncertainty,End Month,End Day Modifier,End Day,End Day Uncertainty,Latitude,Longitude,Start_Date,End_Date,bbox


In [None]:
output_path = os.path.join(base_dir, "Eruption_data", "eruptions_preprocessed.csv")
df_eruptions.to_csv(output_path, index=False, encoding='utf-8-sig')

# plot

In [48]:
df_bounding_boxes = df_eruptions[['Volcano Number', 'bbox']].dropna()
df_bounding_boxes['bbox'] = df_bounding_boxes['bbox'].apply(lambda x: str(x))
df_bounding_boxes = df_bounding_boxes.drop_duplicates(subset=['Volcano Number', 'bbox'])
df_bounding_boxes


Unnamed: 0,Volcano Number,bbox
0,300130,"[54.03165078433657, 159.40758242983895, 54.069..."
2,273010,"[12.76037572885907, 124.04051915187372, 12.780..."
6,263340,"[-8.13365743655555, 114.03740097860978, -8.104..."
12,300250,"[55.92212886517477, 160.44419106087273, 56.129..."
13,332010,"[19.401829379353593, -155.29181153443545, 19.4..."
16,243080,"[-18.992073589786386, -174.76461237119221, -18..."
30,273030,"[13.251680695948448, 123.68162850034646, 13.25..."
37,267010,"[2.2872307470547644, 125.34161039956795, 2.328..."
40,260010,"[12.278092438651273, 93.85415620558221, 12.293..."
73,211060,"[37.72695190822104, 14.9637738913999, 37.76972..."


In [49]:
import ee
ee.Initialize(project='bustling-syntax-392010')
print('done')


# parse and reorder bbox column to correct coordinate format for Earth Engine
def parse_bbox_string(bbox_str):
    
    coords = list(map(float, bbox_str.strip('[] ').split(',')))
    if len(coords) != 4:
        raise ValueError(f"Invalid bbox format: {bbox_str}")
    lat1, lon1, lat2, lon2 = coords
    return ee.Geometry.BBox(lon1, lat1, lon2, lat2)


df_bounding_boxes["ee_bbox"] = df_bounding_boxes["bbox"].apply(parse_bbox_string)

done


In [51]:
import geemap
Map = geemap.Map()

for i, geom in enumerate(df_bounding_boxes["ee_bbox"]):
    Map.addLayer(geom, {'color': 'red'}, f'BBox {i+1}')


Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…