In [7]:
import pandas as pd
import torch
import numpy as np
import pywt
import sys
import os
import matplotlib.pyplot as plt
from pathlib import Path
from typing import Optional
from pydantic import BaseModel
from abc import ABC

# Adiciona o diretório raiz ao sys.path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '../../')))

In [8]:
from ThreeWToolkit.feature_extraction import (
        extract_wavelet_features,
        extract_statistical_features,
        extract_exponential_statistics_features)

from ThreeWToolkit.dataset import ParquetDataset, DatasetConfig
from ThreeWToolkit.core.base_dataset import EventPrefixEnum

### Loading 3W Dataset

In [9]:
dataset_path = Path("./dataset")
ds_config = DatasetConfig(path=dataset_path, split=None, file_type="parquet") # load all files, target defaults to 'class'
ds = ParquetDataset(ds_config, download=True) # download

3w_dataset_2.0.0.zip: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1.79G/1.79G [03:47<00:00, 7.88MB/s]


In [40]:
event_types = [EventPrefixEnum.REAL,]
ds_config = DatasetConfig(path=dataset_path, split=None, file_type="parquet", event_type=event_types) # get only drawn
ds = ParquetDataset(ds_config, download=False)
len(ds)

1119

In [42]:
X = ds[0]['signal']
y = ds[0]['label']

X = X.fillna(0)
y = y.fillna(0)

X

Unnamed: 0_level_0,ABER-CKGL,ABER-CKP,ESTADO-DHSV,ESTADO-M1,ESTADO-M2,ESTADO-PXO,ESTADO-SDV-GL,ESTADO-SDV-P,ESTADO-W1,ESTADO-W2,...,P-PDG,PT-P,P-TPT,QBS,QGL,T-JUS-CKP,T-MON-CKP,T-PDG,T-TPT,state
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-18 01:01:14,100.0,42.75873,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24189600.0,0.0,15792100.0,0.0,1.354473,36.65231,0.0,61.64542,45.63004,0
2017-09-18 01:01:15,100.0,42.75874,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24189900.0,0.0,15791840.0,0.0,1.348486,36.65199,0.0,61.64537,45.62975,0
2017-09-18 01:01:16,100.0,42.75875,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24190060.0,0.0,15792080.0,0.0,1.342498,36.65166,0.0,61.64536,45.63043,0
2017-09-18 01:01:17,100.0,42.75876,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24190230.0,0.0,15792320.0,0.0,1.336511,36.65134,0.0,61.64534,45.63111,0
2017-09-18 01:01:18,100.0,42.75877,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24190010.0,0.0,15792960.0,0.0,1.330523,36.65102,0.0,61.64530,45.63278,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-09-18 06:59:56,100.0,25.84574,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24201390.0,0.0,14934310.0,0.0,1.107623,24.37696,0.0,61.63657,52.01305,0
2017-09-18 06:59:57,100.0,25.84574,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24201650.0,0.0,14935360.0,0.0,1.103462,24.38357,0.0,61.63653,52.00119,0
2017-09-18 06:59:58,100.0,25.84573,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24202180.0,0.0,14936180.0,0.0,1.099302,24.39019,0.0,61.63644,51.97775,0
2017-09-18 06:59:59,100.0,25.84573,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24202710.0,0.0,14936990.0,0.0,1.095141,24.39680,0.0,61.63636,51.95429,0


In [43]:
y

Unnamed: 0_level_0,class
timestamp,Unnamed: 1_level_1
2017-09-18 01:01:14,0
2017-09-18 01:01:15,0
2017-09-18 01:01:16,0
2017-09-18 01:01:17,0
2017-09-18 01:01:18,0
...,...
2017-09-18 06:59:56,3
2017-09-18 06:59:57,3
2017-09-18 06:59:58,3
2017-09-18 06:59:59,3


### Wavelet Feature Extraction

In [39]:
# Configure the extractor
config = extract_wavelet_features.WaveletConfig(level=3, overlap=0.875, offset=0)

# Instantiate the extractor
feature_extractor = extract_wavelet_features.ExtractWaveletFeatures(config)

# Apply it to our data
wavelet_features = feature_extractor(X, y)

# Display the first few rows of the result
print("Shape of the extracted features:", wavelet_features[0].shape)
print("\nColumns are named as <sensor_name>_<feature>_<level>.")
print("Ex: sensor_1_A3 is the level 3 approximation coefficient for sensor 1.")

wavelet_features[0].head()

Shape of the extracted features: (21520, 196)

Columns are named as <sensor_name>_<feature>_<level>.
Ex: sensor_1_A3 is the level 3 approximation coefficient for sensor 1.


Unnamed: 0_level_0,ABER-CKGL_A3,ABER-CKP_A3,ESTADO-DHSV_A3,ESTADO-M1_A3,ESTADO-M2_A3,ESTADO-PXO_A3,ESTADO-SDV-GL_A3,ESTADO-SDV-P_A3,ESTADO-W1_A3,ESTADO-W2_A3,...,P-PDG_A0,PT-P_A0,P-TPT_A0,QBS_A0,QGL_A0,T-JUS-CKP_A0,T-MON-CKP_A0,T-PDG_A0,T-TPT_A0,state_A0
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-18 01:01:21,282.842712,120.940061,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24189660.0,0.0,15793980.0,0.0,1.319373,36.65005,0.0,61.64523,45.63686,0.0
2017-09-18 01:01:22,282.842712,120.940093,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24189590.0,0.0,15794180.0,0.0,1.316791,36.64973,0.0,61.64522,45.63807,0.0
2017-09-18 01:01:23,282.842712,120.940125,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24189520.0,0.0,15794370.0,0.0,1.31421,36.64941,0.0,61.6452,45.63927,0.0
2017-09-18 01:01:24,282.842712,120.940157,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24189410.0,0.0,15796460.0,0.0,1.311629,36.64909,0.0,61.64511,45.64408,0.0
2017-09-18 01:01:25,282.842712,120.940189,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24189300.0,0.0,15798550.0,0.0,1.309047,36.64877,0.0,61.64502,45.64889,0.0


In [37]:
wavelet_features[1].head()

Unnamed: 0_level_0,class
timestamp,Unnamed: 1_level_1
2017-09-18 01:01:21,0
2017-09-18 01:01:22,0
2017-09-18 01:01:23,0
2017-09-18 01:01:24,0
2017-09-18 01:01:25,0


In [24]:
# Configure with an offset
config_offset = extract_wavelet_features.WaveletConfig(level=3, overlap=0.875, offset=20)
extractor_offset = extract_wavelet_features.ExtractWaveletFeatures(config_offset)
features_offset = extractor_offset(X, y)

print("--- No offset ---")
print("Start index:", wavelet_features[0].index[0])

print("\n--- Using offset=20 ---")
print("Start index with offset:", features_offset[0].index[0])
print("Extraction began 20 points later, so the first timestamp is delayed.")

features_offset[0].head()

--- No offset ---
Start index: 2017-09-18 01:01:21

--- Using offset=20 ---
Start index with offset: 2017-09-18 01:01:41
Extraction began 20 points later, so the first timestamp is delayed.


Unnamed: 0_level_0,ABER-CKGL_A3,ABER-CKP_A3,ESTADO-DHSV_A3,ESTADO-M1_A3,ESTADO-M2_A3,ESTADO-PXO_A3,ESTADO-SDV-GL_A3,ESTADO-SDV-P_A3,ESTADO-W1_A3,ESTADO-W2_A3,...,P-PDG_A0,PT-P_A0,P-TPT_A0,QBS_A0,QGL_A0,T-JUS-CKP_A0,T-MON-CKP_A0,T-PDG_A0,T-TPT_A0,state_A0
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-18 01:01:41,282.842712,120.940708,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24189510.0,0.0,15803940.0,0.0,1.037452,36.64363,0.0,61.64454,45.64159,0.0
2017-09-18 01:01:42,282.842712,120.94074,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24188780.0,0.0,15804480.0,0.0,1.03119,36.64331,0.0,61.6445,45.63889,0.0
2017-09-18 01:01:43,282.842712,120.940772,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24188060.0,0.0,15805020.0,0.0,1.024929,36.64299,0.0,61.64446,45.63618,0.0
2017-09-18 01:01:44,282.842712,120.940807,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24187890.0,0.0,15804740.0,0.0,1.018668,36.64267,0.0,61.64445,45.63375,0.0
2017-09-18 01:01:45,282.842712,120.940843,0.0,0.0,0.0,0.0,2.828427,2.828427,2.828427,0.0,...,24187710.0,0.0,15804460.0,0.0,1.012407,36.64235,0.0,61.64445,45.63132,0.0


### Statistical Feature Extraction

In [26]:
# Configure the statistical extractor
config_stats = extract_statistical_features.StatisticalConfig(window_size=20, overlap=0.75)

# Instantiate the extractor
statistical_extractor = extract_statistical_features.ExtractStatisticalFeatures(config_stats)

# Apply it to the same sample data
statistical_features = statistical_extractor(X, y)

# Display the first few rows of the result
print(f"Shape of the extracted statistical features: {statistical_features[0].shape}")
print("\nThe columns contain the 9 statistical features extracted for each sensor.")

statistical_features[0].head()

Shape of the extracted statistical features: (4302, 252)

The columns contain the 9 statistical features extracted for each sensor.


Unnamed: 0_level_0,ABER-CKGL_mean,ABER-CKP_mean,ESTADO-DHSV_mean,ESTADO-M1_mean,ESTADO-M2_mean,ESTADO-PXO_mean,ESTADO-SDV-GL_mean,ESTADO-SDV-P_mean,ESTADO-W1_mean,ESTADO-W2_mean,...,P-PDG_max,PT-P_max,P-TPT_max,QBS_max,QGL_max,T-JUS-CKP_max,T-MON-CKP_max,T-PDG_max,T-TPT_max,state_max
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-18 01:01:33,100.0,42.758837,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24190230.0,0.0,15801850.0,0.0,1.354473,36.652309,0.0,61.64542,45.65527,0.0
2017-09-18 01:01:38,100.0,42.758894,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24190160.0,0.0,15804390.0,0.0,1.324536,36.6507,0.0,61.645271,45.65527,0.0
2017-09-18 01:01:43,100.0,42.758951,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24190160.0,0.0,15805020.0,0.0,1.311629,36.64909,0.0,61.645111,45.65527,0.0
2017-09-18 01:01:48,100.0,42.75901,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24190050.0,0.0,15805020.0,0.0,1.298721,36.647491,0.0,61.644878,45.65527,0.0
2017-09-18 01:01:53,100.0,42.759068,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,24189730.0,0.0,15805020.0,0.0,1.202071,36.645882,0.0,61.64473,45.653431,0.0


One of the possible visualizations for statistical features is to plot the rolling mean with a standard deviation 'band' around it. This gives us an idea of the central tendency and the volatility of the signal over time.

### Exponentially Weighted Statistical Feature Extraction

In [35]:
# Configuring the EWMA statistical extractor
# decay: A value between 0 and 1. A higher value gives more weight to more recent points.
# A decay of 0.9 means each data point has 90% of the weight of the next point in the window.
config_ew = extract_exponential_statistics_features.EWStatisticalConfig(window_size=20, overlap=0.75, decay=0.9)

ew_extractor = extract_exponential_statistics_features.ExtractEWStatisticalFeatures(config_ew)

ew_features = ew_extractor(X, y)

# Display the first few rows of the result
print(f"Shape of the extracted EWMA features: {ew_features[0].shape}")
ew_features[0].head()

Shape of the extracted EWMA features: (4302, 252)


Unnamed: 0_level_0,ABER-CKGL_ew_mean,ABER-CKP_ew_mean,ESTADO-DHSV_ew_mean,ESTADO-M1_ew_mean,ESTADO-M2_ew_mean,ESTADO-PXO_ew_mean,ESTADO-SDV-GL_ew_mean,ESTADO-SDV-P_ew_mean,ESTADO-W1_ew_mean,ESTADO-W2_ew_mean,...,P-PDG_ew_max,PT-P_ew_max,P-TPT_ew_max,QBS_ew_max,QGL_ew_max,T-JUS-CKP_ew_max,T-MON-CKP_ew_max,T-PDG_ew_max,T-TPT_ew_max,state_ew_max
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-09-18 01:01:33,99.999987,42.758869,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,1.402976,0.0,1.008337,0.0,1.844909,2.462274,0.0,2.259787,0.794557,0.0
2017-09-18 01:01:38,99.999987,42.758926,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,1.481351,0.0,0.983055,0.0,1.346377,2.461312,0.0,2.416478,0.952322,0.0
2017-09-18 01:01:43,99.999987,42.758984,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,1.506151,0.0,0.979971,0.0,1.769086,2.458479,0.0,2.871788,1.354666,0.0
2017-09-18 01:01:48,99.999987,42.759043,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,1.773369,0.0,1.124426,0.0,2.761299,2.461795,0.0,2.803631,1.68122,0.0
2017-09-18 01:01:53,99.999987,42.759102,0.0,0.0,0.0,0.0,1.0,1.0,1.0,0.0,...,2.120067,0.0,1.970189,0.0,3.463668,2.458462,0.0,2.778968,1.895084,0.0
