In [1]:
from eruption_forecast import ForecastModel
from typing import Any
import joblib
import os

In [2]:
sds_dir = r"D:\Data\OJN"
use_relevant_features = True

classifier = "rf"

params: dict[str, Any] = {
    "station": "OJN",
    "channel": "EHZ",
    "start_date": "2025-01-01",
    "end_date": "2025-12-31",
    "window_size": 2,
    "volcano_id": "Lewotobi Laki-laki",
}

eruptions = [
    "2025-03-20",
    "2025-04-22",
    "2025-05-18",
    "2025-06-17",
    "2025-07-07",
    "2025-08-01",
    "2025-08-17",
]

In [3]:
fm = ForecastModel(
    overwrite=False,
    n_jobs=4,
    verbose=True,
    debug=False,
    **params,
)

[32m2026-02-13 16:00:01[0m | [1mINFO    [0m | [36meruption_forecast.model.forecast_model[0m:[36m__init__[0m:[36m189[0m - [1mStart Date: 2025-01-01[0m
[32m2026-02-13 16:00:01[0m | [1mINFO    [0m | [36meruption_forecast.model.forecast_model[0m:[36m__init__[0m:[36m190[0m - [1mEnd Date: 2025-12-31[0m
[32m2026-02-13 16:00:01[0m | [1mINFO    [0m | [36meruption_forecast.model.forecast_model[0m:[36m__init__[0m:[36m191[0m - [1mVolcano ID: Lewotobi Laki-laki[0m
[32m2026-02-13 16:00:01[0m | [1mINFO    [0m | [36meruption_forecast.model.forecast_model[0m:[36m__init__[0m:[36m192[0m - [1mNSLC: VG.OJN.00.EHZ[0m
[32m2026-02-13 16:00:01[0m | [1mINFO    [0m | [36meruption_forecast.model.forecast_model[0m:[36m__init__[0m:[36m193[0m - [1mOutput Dir: D:\Projects\eruption-forecast-examples\output[0m


## Calculate Tremor

In [4]:
fm.calculate(
    source="sds",
    sds_dir=sds_dir,
    plot_tmp=True,
    save_plot=True,
    remove_outlier_method="maximum",
)

[32m2026-02-13 16:00:01[0m | [1mINFO    [0m | [36meruption_forecast.sds[0m:[36m__init__[0m:[36m81[0m - [1mSDS initialized: VG.OJN.00.EHZ from D:\Data\OJN[0m
[32m2026-02-13 16:00:01[0m | [1mINFO    [0m | [36meruption_forecast.tremor.calculate_tremor[0m:[36mrun[0m:[36m471[0m - [1mRunning on 4 job(s)[0m
[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.tremor.calculate_tremor[0m:[36mrun[0m:[36m490[0m - [1mTremor data saved to D:\Projects\eruption-forecast-examples\output\VG.OJN.00.EHZ\tremor\tremor_VG.OJN.00.EHZ_2025-01-01-2025-09-28.csv[0m
[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.plot[0m:[36mplot_tremor[0m:[36m70[0m - [1m2025-01-01 :: Plot already exists at D:\Projects\eruption-forecast-examples\output\VG.OJN.00.EHZ\tremor\tremor_VG.OJN.00.png[0m
[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.model.forecast_model[0m:[36m_adjust_dates_to_tremor_range[0m:[36m356[0m - 

<eruption_forecast.model.forecast_model.ForecastModel at 0x207e13be5d0>

## Construct Label (eruption and not eruption)

In [5]:
fm.build_label(
    start_date="2025-01-01",
    end_date="2025-07-24",
    day_to_forecast=2,
    window_step=6,
    window_step_unit="hours",
    eruption_dates=eruptions,
    verbose=True,
)

[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.label.label_builder[0m:[36m__init__[0m:[36m155[0m - [1mStart Date (YYYY-MM-DD): 2025-01-01[0m
[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.label.label_builder[0m:[36m__init__[0m:[36m156[0m - [1mEnd Date (YYYY-MM-DD): 2025-07-24[0m
[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.label.label_builder[0m:[36m__init__[0m:[36m157[0m - [1mWindow Size (days): 2[0m
[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.label.label_builder[0m:[36m__init__[0m:[36m158[0m - [1mWindow Step (hours): 6[0m
[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.label.label_builder[0m:[36m__init__[0m:[36m159[0m - [1mDay To Forecast (days): 2[0m
[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.label.label_builder[0m:[36m__init__[0m:[36m160[0m - [1mVolcano ID: Lewotobi Laki-laki[0m


<eruption_forecast.model.forecast_model.ForecastModel at 0x207e13be5d0>

## Extract Features

In [6]:
fm.extract_features(
    select_tremor_columns=["rsam_f2", "rsam_f3", "rsam_f4", "dsar_f3-f4"],
    save_tremor_matrix_per_method=True,
    save_tremor_matrix_per_id=False,
    exclude_features=[
        "agg_linear_trend",
        "linear_trend_timewise",
        "length",
        "has_duplicate_max",
        "has_duplicate_min",
        "has_duplicate",
    ],
    use_relevant_features=use_relevant_features,
    overwrite=False,
)

[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.features.tremor_matrix_builder[0m:[36mvalidate[0m:[36m152[0m - [1mstart_date updated to: 2025-01-01 00:00:00[0m
[32m2026-02-13 16:00:05[0m | [1mINFO    [0m | [36meruption_forecast.features.tremor_matrix_builder[0m:[36mbuild[0m:[36m407[0m - [1mTremor matrix D:\Projects\eruption-forecast-examples\output\VG.OJN.00.EHZ\features\tremor_matrix_unified_2025-01-01_2025-09-28_ws-2.csv already exists.[0m


<eruption_forecast.model.forecast_model.ForecastModel at 0x207e13be5d0>

In [7]:
fm.FeaturesBuilder.label_features_csv

'D:\\Projects\\eruption-forecast-examples\\output\\VG.OJN.00.EHZ\\features\\label_features_2025-01-03-2025-07-24.csv'

In [8]:
fm.train(
    classifier=classifier,  # ty:ignore[invalid-argument-type]
    cv_strategy="stratified",
    random_state=0,
    total_seed=500,
    number_of_significant_features=20,
    sampling_strategy=0.75,
    save_all_features=True,
    plot_significant_features=True,
    overwrite=False,
    verbose=True,
)

[32m2026-02-13 16:00:06[0m | [1mINFO    [0m | [36meruption_forecast.model.model_trainer[0m:[36m__init__[0m:[36m232[0m - [1mTrain model using 4 jobs with RandomForestClassifier classifier and stratified CV strategy (5 splits)[0m
[32m2026-02-13 16:00:06[0m | [1mINFO    [0m | [36meruption_forecast.model.model_trainer[0m:[36mtrain[0m:[36m757[0m - [1mRunning on 4 job(s)[0m


| Training model
|- Using Relevant features


[32m2026-02-13 16:00:10[0m | [1mINFO    [0m | [36meruption_forecast.model.model_trainer[0m:[36m_aggregate_metrics[0m:[36m1081[0m - [1mMetrics Summary (mean ± std across seeds)[0m
[32m2026-02-13 16:00:10[0m | [1mINFO    [0m | [36meruption_forecast.model.model_trainer[0m:[36m_aggregate_metrics[0m:[36m1092[0m - [1maccuracy            : 0.8658 ± 0.0456[0m
[32m2026-02-13 16:00:10[0m | [1mINFO    [0m | [36meruption_forecast.model.model_trainer[0m:[36m_aggregate_metrics[0m:[36m1092[0m - [1mbalanced_accuracy   : 0.8471 ± 0.0618[0m
[32m2026-02-13 16:00:10[0m | [1mINFO    [0m | [36meruption_forecast.model.model_trainer[0m:[36m_aggregate_metrics[0m:[36m1092[0m - [1mf1_score            : 0.4872 ± 0.0952[0m
[32m2026-02-13 16:00:10[0m | [1mINFO    [0m | [36meruption_forecast.model.model_trainer[0m:[36m_aggregate_metrics[0m:[36m1092[0m - [1mprecision           : 0.3526 ± 0.0905[0m
[32m2026-02-13 16:00:10[0m | [1mINFO    [0m | [36merupti

<eruption_forecast.model.forecast_model.ForecastModel at 0x207e13be5d0>

In [9]:
classifier_name = fm.classifier_name

forecast_model = joblib.dump(fm, os.path.join(r"D:\Projects\eruption-forecast-examples\output\VG.OJN.00.EHZ\trainings", f"forecast_model_{classifier_name}_{fm.basename}.pkl"))

# Build Prediction

In [10]:
from eruption_forecast.features.tremor_matrix_builder import TremorMatrixBuilder
from eruption_forecast.features.features_builder import FeaturesBuilder
from eruption_forecast.utils import construct_windows, to_datetime

In [11]:
output_dir = os.path.join(r"D:\Projects\eruption-forecast-examples\output\VG.OJN.00.EHZ", "predictions")
os.makedirs(output_dir, exist_ok=True)

In [12]:
tremor_dir = os.path.join(output_dir, "tremor")
features_dir = os.path.join(output_dir, "features")
extracted_dir = os.path.join(features_dir, "extracted")
os.makedirs(tremor_dir, exist_ok=True)
os.makedirs(extracted_dir, exist_ok=True)

In [13]:
train_label_end_date = fm.LabelBuilder.end_date
tremor_start_date = fm.TremorData.start_date
tremor_end_date = fm.TremorData.end_date
tremor_start_date, train_label_end_date

(datetime.datetime(2025, 1, 1, 0, 0),
 datetime.datetime(2025, 7, 24, 23, 59, 59))

In [14]:
start_date_str = "2025-07-27"
end_date_str = "2025-08-04"
start_date = to_datetime(str(start_date_str)).replace(hour=0, minute=0, second=0)
end_date = to_datetime(str(end_date_str)).replace(hour=23, minute=59, second=59)

In [15]:
select_tremor_columns = ["rsam_f2", "rsam_f3", "rsam_f4", "dsar_f3-f4"]

In [16]:
# Validating
if start_date < train_label_end_date:
    raise ValueError("Start date of prediction must be greater than tremor end date.")
if end_date > tremor_end_date:
    raise ValueError("End date of prediction must be before tremor end date.")

### Extract Features Prediction

#### Construct Windows (different parameter when training model)

In [17]:
window_step = 10
window_step_unit = "minutes"
basename = f"{start_date_str}_{end_date_str}_step-{window_step}-{window_step_unit}"
basename

'2025-07-27_2025-08-04_step-10-minutes'

In [18]:
label_df = construct_windows(
    start_date=start_date,
    end_date=end_date,
    window_step=10,
    window_step_unit="minutes",
)

In [19]:
label_df["id"] = range(len(label_df))

In [20]:
label_df.head(5)

Unnamed: 0_level_0,id
datetime,Unnamed: 1_level_1
2025-07-27 00:00:00,0
2025-07-27 00:10:00,1
2025-07-27 00:20:00,2
2025-07-27 00:30:00,3
2025-07-27 00:40:00,4


In [21]:
tremor_data = fm.TremorData.df
tremor_data.index[-1]

Timestamp('2025-09-28 23:50:00')

#### Build Tremor Matrix

In [22]:
tremor_matrix_builder = TremorMatrixBuilder(
    tremor_df=tremor_data,
    label_df=label_df,
    output_dir=features_dir,
    window_size=2,
    overwrite=True,
    verbose=True,
).build(
    select_tremor_columns=["rsam_f2", "rsam_f3", "rsam_f4", "dsar_f3-f4"],
    save_tremor_matrix_per_method=True,
    save_tremor_matrix_per_id=False,
)

[32m2026-02-13 16:00:11[0m | [1mINFO    [0m | [36meruption_forecast.features.tremor_matrix_builder[0m:[36mbuild[0m:[36m412[0m - [1mCreate tremor matrix which grouped by label ID.[0m
[32m2026-02-13 16:00:14[0m | [1mINFO    [0m | [36meruption_forecast.features.tremor_matrix_builder[0m:[36msave_matrix_per_method[0m:[36m213[0m - [1mTremor matrix rsam_f2 is saved to: D:\Projects\eruption-forecast-examples\output\VG.OJN.00.EHZ\predictions\features\tremor_matrix_per_method\tremor_matrix_rsam_f2.csv[0m
[32m2026-02-13 16:00:14[0m | [1mINFO    [0m | [36meruption_forecast.features.tremor_matrix_builder[0m:[36msave_matrix_per_method[0m:[36m213[0m - [1mTremor matrix rsam_f3 is saved to: D:\Projects\eruption-forecast-examples\output\VG.OJN.00.EHZ\predictions\features\tremor_matrix_per_method\tremor_matrix_rsam_f3.csv[0m
[32m2026-02-13 16:00:15[0m | [1mINFO    [0m | [36meruption_forecast.features.tremor_matrix_builder[0m:[36msave_matrix_per_method[0m:[36m2

In [23]:
features_builder = FeaturesBuilder(
    tremor_matrix_df=tremor_matrix_builder.df,
    output_dir=features_dir,
    overwrite=False,
    n_jobs=4,
)

#### Extract Features

In [24]:
extracted_features_df = features_builder.extract_features(
    use_relevant_features=use_relevant_features,
    select_tremor_columns=select_tremor_columns,
    prefix_filename="prediction",
)

[32m2026-02-13 16:00:16[0m | [1mINFO    [0m | [36meruption_forecast.features.features_builder[0m:[36mextract_features[0m:[36m436[0m - [1mNo labels provided. Using relevant features will be disabled. All features will be extracted.[0m
Feature Extraction: 100%|██████████| 20/20 [00:39<00:00,  1.97s/it]
[32m2026-02-13 16:00:58[0m | [1mINFO    [0m | [36meruption_forecast.features.features_builder[0m:[36m_extract_features_for_column[0m:[36m316[0m - [1mExtracted features for rsam_f2 saved: D:\Projects\eruption-forecast-examples\output\VG.OJN.00.EHZ\predictions\features\extracted\prediction_all_features_2025-07-25-2025-08-04_rsam_f2.csv[0m
Feature Extraction: 100%|██████████| 20/20 [00:41<00:00,  2.09s/it]
[32m2026-02-13 16:01:43[0m | [1mINFO    [0m | [36meruption_forecast.features.features_builder[0m:[36m_extract_features_for_column[0m:[36m316[0m - [1mExtracted features for rsam_f3 saved: D:\Projects\eruption-forecast-examples\output\VG.OJN.00.EHZ\predictio

 ### Run Prediction