In [3]:
import pandas as pd
import numpy as np

from pathlib import Path
import yaml
from typing import List
from itertools import product

from src.data.utils import create_output_path
from src import paths


In [16]:

params_path: Path = paths.config_dir("params.yaml")
fault_detection_dir: Path = paths.data_processed_dir("fault_detection")

with open(params_path, "r") as file:
  params = yaml.safe_load(file)

N_values: List[int] = params["N_values"]
k_values: List[float] = params["k_values"]
th_values: List[float] = params["voting_thresholds"]

selected_band: str = params["selected_band"]

## Prediction dates metrics

In [9]:
fault_detection_metadata_filename = "_".join(
        ["fault_detection_metadata", selected_band])
fault_detection_metadata_filename += ".csv"
fault_detection_metadata_path = fault_detection_dir / \
    fault_detection_metadata_filename

pixel_true_values_df = pd.read_csv(
    fault_detection_metadata_path, index_col=["ID", "IDpix"])

poly_true_values_df = pixel_true_values_df.groupby(
        "ID")[["change_type", "change_start", "last_non_change_date", "vegetation_type", "label"]].min()

In [15]:
for N, k, th in product(N_values, k_values, th_values):

  filename = f"predictions_N={N}_k={k}_" + selected_band + ".csv"
  pix_pred_path = paths.data_processed_dir("pixel_predictions", filename)

  pix_pred = pd.read_csv(pix_pred_path, index_col=["ID", "IDpix"])

  poly_pred = pix_pred.groupby("ID")["prediction"].mean().apply(lambda x: 1.0 if x >= th else 0.0)
  poly_pred_detection_dates = pix_pred.groupby("ID")["event_date"].max()

  poly_pred_df = pd.DataFrame({"prediction": poly_pred, "event_date": poly_pred_detection_dates})


  # filename = f"predictions_N={N}_k={k}_th={th}"
  # poly_pred_path = paths.data_processed_dir("poly_predictions", filename)
  # create_output_path(poly_pred_path)

  # poly_pred.to_csv(poly_pred_path)

  break


In [25]:
poly_true_values_df

Unnamed: 0_level_0,change_type,change_start,last_non_change_date,vegetation_type,label
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
6,stable,1800-01-01 00:00:00,2022-05-01 00:00:00,native,0
7,stable,1800-01-01 00:00:00,2021-02-22 00:00:00,native,0
8,stable,1800-01-01 00:00:00,2020-10-07 00:00:00,native,0
10,stable,1800-01-01 00:00:00,2021-08-01 00:00:00,native,0
11,stable,1800-01-01 00:00:00,2021-02-02 00:00:00,native,0
...,...,...,...,...,...
414,fire,2020-10-07 00:00:00,2016-05-14 00:00:00,exotic,1
415,fire,2020-10-01 00:00:00,2016-07-26 00:00:00,exotic,1
418,fire,2018-11-30 00:00:00,2016-05-14 00:00:00,exotic,1
419,fire,2018-04-11 00:00:00,2016-07-26 00:00:00,exotic,1


In [32]:
poly_pred_df

Unnamed: 0_level_0,prediction,event_date
ID,Unnamed: 1_level_1,Unnamed: 2_level_1
6,0.0,1800-01-01
7,0.0,1800-01-01
8,0.0,2022-07-24
10,1.0,2022-10-23
11,1.0,2022-09-11
...,...,...
414,1.0,2020-11-01
415,1.0,2020-10-11
418,1.0,2018-09-16
419,1.0,2018-04-29


In [37]:
change_poly_true_event_dates = poly_true_values_df[poly_true_values_df["label"] == 1]

change_poly_predicted_event_dates = poly_pred_df.loc[change_poly_true_event_dates.index]["event_date"]


In [45]:
pd.to_datetime(change_poly_true_event_dates["last_non_change_date"]) < pd.to_datetime(change_poly_predicted_event_dates)

ID
84     True
86     True
87     True
88     True
89     True
       ... 
414    True
415    True
418    True
419    True
422    True
Length: 179, dtype: bool

In [57]:
pd.to_datetime(change_poly_predicted_event_dates) < pd.to_datetime(change_poly_true_event_dates["change_start"])

ID
84     False
86     False
87     False
88      True
89     False
       ...  
414    False
415    False
418     True
419    False
422    False
Length: 179, dtype: bool

In [73]:
detection_deltas = pd.to_datetime(change_poly_predicted_event_dates) - pd.to_datetime(change_poly_true_event_dates["change_start"])

In [75]:
detection_deltas.max()

Timedelta('27 days 00:00:00')

In [70]:
change_poly_predicted_event_dates[detection_deltas[detection_deltas == detection_deltas.max()].index]

ID
404    1800-01-01
Name: event_date, dtype: object