# Libraries

In [1]:
from src import paths
import pandas as pd
import numpy as np

# Dataset


In [2]:
stable_ndvi_path = paths.data_interim_dir("stable", "stable_ndvi.csv")
drought_ndvi_path = paths.data_interim_dir("drought", "drought_ndvi.csv")

In [3]:
stable_df = pd.read_csv(stable_ndvi_path, index_col=["ID", "IDpix"])
drought_df = pd.read_csv(drought_ndvi_path, index_col=["ID", "IDpix"])

In [4]:
non_change_df = pd.concat((stable_df, drought_df), axis=0)

In [5]:
metadata_columns = ["lat", "lon", "change_type", "change_start", "vegetation_type"]
non_change_signal_columns = list(filter(lambda col: col not in metadata_columns, (col for col in non_change_df.columns)))

In [6]:
non_change_metadata_df = non_change_df[metadata_columns]

In [7]:
non_change_signal_df = non_change_df[non_change_signal_columns]
non_change_signal_df.columns = pd.to_datetime(non_change_signal_df.columns)
non_change_signal_df = non_change_signal_df.reindex(sorted(non_change_signal_df.columns), axis=1)

In [8]:
non_change_signal_df

Unnamed: 0_level_0,Unnamed: 1_level_0,2000-01-03,2000-01-04,2000-01-11,2000-01-18,2000-01-20,2000-01-26,2000-01-27,2000-01-28,2000-02-03,2000-02-05,...,2022-12-08,2022-12-09,2022-12-10,2022-12-14,2022-12-16,2022-12-17,2022-12-18,2022-12-19,2022-12-24,2022-12-25
ID,IDpix,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,1,,,0.707001,,,,0.723551,,,,...,,0.723368,,0.615958,,0.683760,,,,0.660891
0,2,,,0.730423,,,,0.720345,,,,...,,0.732564,,0.655706,,0.755396,,,,0.734281
0,3,,,0.746536,,,,0.741756,,,,...,,0.764361,,0.695667,,0.774370,,,,0.749191
0,4,,,0.750166,,,,0.741654,,,,...,,0.765921,,0.735234,,0.772314,,,,0.746010
0,5,,,0.765939,,,,0.749571,,,,...,,0.761045,,0.710822,,0.760751,,,,0.744337
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
326,6579,,,0.355533,,,,0.349749,,,,...,,0.367841,,0.407246,,0.356350,,,,0.328377
326,6580,,,0.392213,,,,0.388196,,,,...,,0.403706,,0.416060,,0.392987,,,,0.380575
326,6581,,,0.428320,,,,0.416210,,,,...,,0.388647,,0.395280,,0.365728,,,,0.351286
326,6582,,,0.442681,,,,0.414121,,,,...,,0.374593,,0.385955,,0.363775,,,,0.345881


## Sample 30% of polygons for esn training

In [9]:
num_polygons = non_change_metadata_df.index.get_level_values(0).unique().size
esn_n = int(num_polygons * 0.3)
fault_detection_n = num_polygons - esn_n

In [10]:
esn_n, fault_detection_n

(44, 105)

In [11]:
rng = np.random.default_rng(0)

In [12]:
esn_indices = pd.Index(rng.choice(non_change_metadata_df.index.get_level_values(0).unique(), size=esn_n, replace=False))
fault_detection_indices = non_change_metadata_df.index.get_level_values(0).unique()[~non_change_metadata_df.index.get_level_values(0).unique().isin(esn_indices)]

assert (non_change_metadata_df.index.get_level_values(0).unique() == pd.Index.union(esn_indices, fault_detection_indices)).all()

In [None]:
non_change_fault_detection_metadata_df = non_change_metadata_df.loc[fault_detection_indices]
non_change_fault_detection_signal_df = non_change_signal_df.loc[fault_detection_indices]

In [13]:
esn_metadata_df = non_change_metadata_df.loc[esn_indices]
esn_signal_df = non_change_signal_df.loc[esn_indices]

## Fault detection dataset

In [14]:
logging_ndvi_path = paths.data_interim_dir("logging", "logging_ndvi.csv")
fire_ndvi_path = paths.data_interim_dir("fire", "fire_ndvi.csv")

logging_df = pd.read_csv(logging_ndvi_path, index_col=["ID", "IDpix"])
fire_df = pd.read_csv(fire_ndvi_path, index_col=["ID", "IDpix"])

In [15]:
change_df = pd.concat((logging_df, fire_df), axis=0)

In [16]:
change_signal_columns = list(filter(lambda col: col not in metadata_columns, (col for col in change_df.columns)))

In [17]:
change_metadata_df = change_df[metadata_columns]

In [18]:
change_signal_df = change_df[change_signal_columns]
change_signal_df.columns = pd.to_datetime(change_signal_df.columns)
change_signal_df = change_signal_df.reindex(sorted(change_signal_df.columns), axis=1)

### Create dummy event dates for non-change polygons

In [19]:
non_change_event_dates = pd.Series(non_change_fault_detection_signal_df.columns.max() +  pd.DateOffset(years=1), index=non_change_fault_detection_signal_df.index) # dummy event date for non_change signal

In [20]:
non_change_fault_detection_metadata_df["change_start"] = non_change_event_dates

In [21]:
non_change_fault_detection_metadata_df

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,change_type,change_start,vegetation_type
ID,IDpix,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2,87,-33.058766,-71.004412,stable,2023-12-25,native
2,88,-33.059036,-71.004951,stable,2023-12-25,native
2,89,-33.058227,-71.003334,stable,2023-12-25,native
2,90,-33.058497,-71.003065,stable,2023-12-25,native
2,91,-33.058497,-71.003873,stable,2023-12-25,native
...,...,...,...,...,...,...
326,6579,-33.647073,-71.043219,drought,2023-12-25,native
326,6580,-33.647881,-71.044836,drought,2023-12-25,native
326,6581,-33.647342,-71.043758,drought,2023-12-25,native
326,6582,-33.648420,-71.044836,drought,2023-12-25,native


### Filter out event dates prior to threshold date in polygons with events

In [22]:
event_threshold = pd.Timestamp("01-01-2010")

In [23]:
change_metadata_df.loc[:, "change_start"] = pd.to_datetime(change_metadata_df["change_start"], dayfirst=True)

In [24]:
selected_change_polygons_indices = change_metadata_df.loc[change_metadata_df["change_start"] > event_threshold].index

In [25]:
change_metadata_df = change_metadata_df.loc[selected_change_polygons_indices]
change_signal_df = change_signal_df.loc[selected_change_polygons_indices]

In [26]:
change_signal_df

Unnamed: 0_level_0,Unnamed: 1_level_0,2000-01-03,2000-01-11,2000-01-18,2000-01-26,2000-01-27,2000-01-28,2000-02-03,2000-02-11,2000-02-12,2000-02-19,...,2022-12-01,2022-12-02,2022-12-08,2022-12-09,2022-12-14,2022-12-16,2022-12-17,2022-12-19,2022-12-24,2022-12-25
ID,IDpix,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
84,172,,0.419674,,,0.386757,,,,,,...,0.323107,,,0.313889,0.263777,,,,,0.301909
84,173,,0.419674,,,0.386757,,,,,,...,0.323107,,,0.313889,0.263777,,,,,0.301909
84,174,,0.413802,,,0.380016,,,,,,...,0.320048,,,0.318409,0.259636,,,,,0.304786
84,175,,0.424940,,,0.393733,,,,,,...,0.334643,,,0.321906,0.270980,,,,,0.307882
84,176,,0.436430,,,0.408313,,,,,,...,0.317429,,,0.317217,0.248838,,,,,0.302583
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
405,4892,,0.328267,0.348205,,0.323358,,0.355581,,,0.317886,...,0.786440,,0.835379,0.781231,,0.797883,0.765506,,0.830963,0.776780
405,4893,,0.357780,0.374025,,0.319277,,0.383106,,,0.368284,...,0.791635,,0.793060,0.777738,0.734830,0.801442,0.750359,,0.786199,0.693786
405,4894,,0.357780,0.374025,,0.319277,,0.383106,,,0.368284,...,0.791635,,0.793060,0.777738,0.734830,0.801442,0.750359,,0.786199,0.693786
405,4895,,0.282037,0.330697,,0.312954,,0.346696,,,0.312491,...,0.727844,,0.604064,0.505488,0.500428,0.483013,0.459270,,0.477397,0.466940


In [27]:
fault_detection_metadata_df = pd.concat((non_change_fault_detection_metadata_df, change_metadata_df))

In [28]:
fault_detection_signal_df = pd.concat((non_change_fault_detection_signal_df, change_signal_df))
fault_detection_signal_df = fault_detection_signal_df.reindex(sorted(fault_detection_signal_df.columns), axis=1)

In [29]:
fault_detection_metadata_df

Unnamed: 0_level_0,Unnamed: 1_level_0,lat,lon,change_type,change_start,vegetation_type
ID,IDpix,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2,87,-33.058766,-71.004412,stable,2023-12-25 00:00:00,native
2,88,-33.059036,-71.004951,stable,2023-12-25 00:00:00,native
2,89,-33.058227,-71.003334,stable,2023-12-25 00:00:00,native
2,90,-33.058497,-71.003065,stable,2023-12-25 00:00:00,native
2,91,-33.058497,-71.003873,stable,2023-12-25 00:00:00,native
...,...,...,...,...,...,...
405,4892,-34.077456,-71.633143,fire,2018-03-24 00:00:00,native
405,4893,-34.078264,-71.632334,fire,2018-03-24 00:00:00,native
405,4894,-34.076917,-71.633682,fire,2018-03-24 00:00:00,native
405,4895,-34.077995,-71.632873,fire,2018-03-24 00:00:00,native


In [30]:
fault_detection_signal_df

Unnamed: 0_level_0,Unnamed: 1_level_0,2000-01-03,2000-01-04,2000-01-11,2000-01-18,2000-01-20,2000-01-26,2000-01-27,2000-01-28,2000-02-03,2000-02-05,...,2022-12-08,2022-12-09,2022-12-10,2022-12-14,2022-12-16,2022-12-17,2022-12-18,2022-12-19,2022-12-24,2022-12-25
ID,IDpix,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2,87,0.733178,,0.804631,,,,0.794510,,,,...,,0.779037,,0.746279,,0.736044,,,,0.754526
2,88,0.748210,,0.826657,,,,0.796656,,,,...,,0.823661,,0.787691,,0.781643,,,,0.794315
2,89,0.720108,,0.791214,,,,0.776143,,,,...,,0.796419,,0.788461,,0.778635,,,,0.780177
2,90,0.663568,,0.700002,,,,0.704723,,,,...,,0.712442,,0.715461,,0.684809,,,,0.686562
2,91,,,0.488417,0.388599,,0.374015,0.488722,,0.348919,,...,0.859983,0.832546,,,,,,0.79907,0.853928,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
405,4892,,,0.328267,0.348205,,,0.323358,,0.355581,,...,0.835379,0.781231,,,0.797883,0.765506,,,0.830963,0.776780
405,4893,,,0.357780,0.374025,,,0.319277,,0.383106,,...,0.793060,0.777738,,0.734830,0.801442,0.750359,,,0.786199,0.693786
405,4894,,,0.357780,0.374025,,,0.319277,,0.383106,,...,0.793060,0.777738,,0.734830,0.801442,0.750359,,,0.786199,0.693786
405,4895,,,0.282037,0.330697,,,0.312954,,0.346696,,...,0.604064,0.505488,,0.500428,0.483013,0.459270,,,0.477397,0.466940
