In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import yaml
import pandas as pd
import numpy as np
import os
from copy import deepcopy
from src import paths
from src.models.fault_detection import simulate_signal, detect_fault
from src.notifications import send_telegram_notification
import reservoirpy as rpy
from typing import Dict

In [3]:
rpy.verbosity(0)

0

In [4]:
def save_signal_simulations(iteration:int = None,
                            signal_simulations: Dict = None,
                            save_interval: int = 5,
                            ):
    
    # Check if 'backup' directory exists. If not, create it.
    if not os.path.exists("results"):
        os.makedirs("results")

    # Save the result in the directory
    filename = f"signal_simulations_up_to_iter_{iteration}.pickle"
    filepath = os.path.join("results", filename)

    with open(filepath, "wb") as file:
        pickle.dump(signal_simulations, file)

    # Delete the prevoius result
    prev_filename = f"signal_simulations_up_to_iter_{iteration - save_interval}.pickle"
    prev_filepath = os.path.join("results", prev_filename)

    if os.path.exists(prev_filepath):
        os.remove(prev_filepath)

In [5]:

# Paths

trained_esn_path = paths.models_dir("trained_esn_mndwi.pickle")
params_path = paths.config_dir("params.yaml")

pre_megadrought_fault_detection_metadata_path = paths.data_processed_dir("fault_detection", "fault_detection_metadata_mndwi.csv")
pre_megadrought_fault_detection_dataset_path = paths.data_processed_dir("fault_detection", "fault_detection_signal_mndwi.csv")


In [6]:
with open(params_path, "r") as file:
  params = yaml.safe_load(file)

save_interval: int = params["save_interval"]
weeks_after_change_offset: int = params["weeks_after_change_offset"]
esn_features_dim: int = params["esn_features_dim"]
fault_detection_forecasted_steps: int = params["fault_detection_forecasted_steps"]
non_change_placeholder_date = params["non_change_placeholder_date"]


In [7]:
date_offset = pd.DateOffset(
    weeks=weeks_after_change_offset
)

In [8]:
pre_megadrought_fault_detection_df = pd.read_csv(pre_megadrought_fault_detection_dataset_path, index_col=["ID", "IDpix"])
pre_megadrought_fault_detection_metadata_df = pd.read_csv(pre_megadrought_fault_detection_metadata_path, index_col=["ID", "IDpix"])

In [9]:
y = pre_megadrought_fault_detection_metadata_df["label"]

In [10]:
X = pre_megadrought_fault_detection_df
X.columns = pd.to_datetime(X.columns)

In [11]:
change_start_dates = pre_megadrought_fault_detection_metadata_df["change_start"]
change_start_dates = pd.to_datetime(change_start_dates)

## Signal simulation

In [12]:
X_plug_test = X
y_plug_test = y.loc[X_plug_test.index]

In [19]:
len(X_plug_test.index.get_level_values(0).unique())

99

In [14]:
y_plug_test

ID   IDpix
219  5692     1
390  3508     1
31   939      0
382  2564     1
270  3211     0
375  1667     1
138  2007     1
391  3717     0
38   1094     0
370  42       1
245  999      0
370  627      1
372  1235     1
412  5809     0
206  5438     1
147  2743     1
414  6684     0
370  259      1
192  4603     0
372  1123     1
187  4323     0
382  2582     1
295  4666     0
245  1003     0
173  3637     0
172  3529     0
139  2260     0
168  3295     1
109  955      0
191  4507     0
Name: label, dtype: int64

In [15]:
with open(trained_esn_path, "rb") as file:
  esn = pickle.load(file)

In [16]:
send_telegram_notification("Starting execution")

signal_simulations = []
y_pred_pixels = pd.Series(np.zeros(y.shape[0]), index=y.index)

send_telegram_notification("Data importation completed")

num_pixles = len(X_plug_test.index)


send_telegram_notification(
    f"Begining iteration of {num_pixles} pixels for signal simulation"
)

for i, index in enumerate(X_plug_test.index):

    signal = X_plug_test.loc[index]

    if y_plug_test.loc[index] == 0:
        bounded_signal = signal
    else:
        event_date = change_start_dates.loc[index]
        bounded_signal = signal[: event_date + date_offset]


    signal_simulation = simulate_signal(
        signal=bounded_signal,
        model=deepcopy(esn),
        num_features=esn_features_dim,
        forecasted_steps=fault_detection_forecasted_steps,
        step_size=1,
    )

    signal_simulations.append(signal_simulation)

    # Store and notify every save_interval iterations
    if (i + 1) % save_interval == 0:
        completion_percentage = ((i + 1) / num_pixles) * 100
        # save_signal_simulations(i + 1, signal_simulations)
        send_telegram_notification(
            f"Completed: {i+1} iterations; {completion_percentage:.2f}% of total iterations"
        )

# save_signal_simulations(num_pixles, signal_simulations)

send_telegram_notification("Signal simulation done")



{'ok': True,
 'result': {'message_id': 896,
  'from': {'id': 6622010173,
   'is_bot': True,
   'first_name': 'fault-detection-bot',
   'username': 'PolyFaultDetection_bot'},
  'chat': {'id': 1302065276,
   'first_name': 'Tomás',
   'last_name': 'Couso',
   'username': 'tcouso',
   'type': 'private'},
  'date': 1709921080,
  'text': 'Signal simulation done'}}

In [27]:
simulation_results_dict = {
  "prediction": [],
  "event_date": []
}

for simulation in signal_simulations:
  curr_lower_bound = simulation["lower_bound"]
  curr_lower_bound_dates = simulation["lower_bound_dates"]
  curr_forecasts = simulation["forecasts"]

  pred, date = detect_fault(
    N=9,
    k=1.0,
    step_size=1,
    non_change_placeholder_date=non_change_placeholder_date,
    lower_bound=curr_lower_bound,
    lower_bound_dates=curr_lower_bound_dates,
    forecasts=curr_forecasts
  )

  simulation_results_dict["prediction"].append(pred)
  simulation_results_dict["event_date"].append(date)

In [28]:
y_pred_plug_test = pd.DataFrame(simulation_results_dict)
y_pred_plug_test.index = X_plug_test.index
y_pred_plug_test["event_date"] = pd.to_datetime(y_pred_plug_test["event_date"])

In [29]:
y_pred_plug_test

Unnamed: 0_level_0,Unnamed: 1_level_0,prediction,event_date
ID,IDpix,Unnamed: 2_level_1,Unnamed: 3_level_1
219,5692,1,2017-01-15
390,3508,1,2017-02-19
31,939,0,1800-01-01
382,2564,0,1800-01-01
270,3211,1,2019-01-06
375,1667,1,2015-12-20
138,2007,1,2015-11-01
391,3717,1,2019-01-13
38,1094,1,2019-03-24
370,42,1,2018-02-18


In [30]:
pre_megadrought_fault_detection_metadata_df.loc[X_plug_test.index][["label", "change_start"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,label,change_start
ID,IDpix,Unnamed: 2_level_1,Unnamed: 3_level_1
219,5692,1,2017-10-07
390,3508,1,2017-10-21
31,939,0,1800-01-01
382,2564,1,2019-09-02
270,3211,0,1800-01-01
375,1667,1,2016-03-18
138,2007,1,2016-02-23
391,3717,0,1800-01-01
38,1094,0,1800-01-01
370,42,1,2018-04-13
