In [1]:
from typing import Callable, List

import autorootcwd  # noqa
import numpy as np
import pandas as pd

from scipy.signal import hilbert 

In this notebook, a visual analysis is going to be made regarding different possibilities of sets of features. 

A boxplot visualization grouping the feature value by classes and sensors to evaluate if it is possible to manually classify through some of them.

## Loading data

Loading and already removing the NaN values from the first three sensors

In [2]:
classes = np.load("data/raw/Classes.npy", allow_pickle=True)
df_classes = pd.DataFrame(classes, columns=["classes"])

sensor_1 = np.load("data/raw/Dados_1.npy", allow_pickle=True)
sensor_2 = np.load("data/raw/Dados_2.npy", allow_pickle=True)
sensor_3 = np.load("data/raw/Dados_3.npy", allow_pickle=True)
sensor_4 = np.load("data/raw/Dados_4.npy", allow_pickle=True)
sensor_5 = np.load("data/raw/Dados_5.npy", allow_pickle=True)

In [3]:
sensor_1, sensor_2, sensor_3 = sensor_1[:,:-1], sensor_2[:,:-1], sensor_3[:,:-1]

## Preprocess data

Loading utility functions for different signal representations and different features that are common to be seen for the task of fault detection using vibration signals on rotating machinery.

References: 

> [1] AHMED, Hosameldin; NANDI, Asoke K. Condition monitoring with vibration signals: Compressive sampling and learning algorithms for rotating machines. John Wiley & Sons, 2020.

> [2] Matlab. https://www.mathworks.com/help/predmaint/ug/signal-features.html

> [3] RANDALL, Robert Bond. Vibration-based condition monitoring: industrial, automotive and aerospace applications. John Wiley & Sons, 2021.

> [4] SMITH, Wade A.; RANDALL, Robert B. Rolling element bearing diagnostics using the Case Western Reserve University data: A benchmark study. Mechanical systems and signal processing, v. 64, p. 100-131, 2015.

In [4]:
from utils.signal_representations import (
    acc_to_displ,
    acc_to_vel,
    envelope_spectrum,
    get_fft,
    get_fftfreq,    
)

from utils.features.statistical import (
    kurtosis,
    peak_to_peak,
    rms,
    skewness,
    shape_factor,
)

from utils.features.impulsive import (
    clearance_factor,
    crest_factor,
    impulse_factor,
    peak,
)

from utils.features.frequency_domain import spectral_rms

from utils.filters import array_filter, kurtogram_bandpass

In [5]:
SAMPLING_RATE = 10_000

Creating a single dataframe containing all the sensors data to make it easier to calculate new features.

In [6]:
sensor_data = pd.DataFrame({
    "sensor_1/acceleration": list(sensor_1),
    "sensor_2/acceleration": list(sensor_2),
    "sensor_3/acceleration": list(sensor_3),
    "sensor_4": list(sensor_4),
    "sensor_5/acceleration": list(sensor_5),    
})

In [7]:
dataset = pd.concat([df_classes, sensor_data], axis=1)
dataset.head()

Unnamed: 0,classes,sensor_1/acceleration,sensor_2/acceleration,sensor_3/acceleration,sensor_4,sensor_5/acceleration
0,Classe D,"[-0.03290592200083503, -0.03745633214437494, -...","[-0.03245771343414401, -0.06589485824077518, -...","[-0.18207789777056302, -0.16599048709531383, 0...","[50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50....","[20.881371833507693, 15.145424452775224, 72.02..."
1,Classe A,"[0.19861925356262025, 0.17894949079687933, 0.0...","[-0.044002158627886055, -0.02630014078908132, ...","[-0.06512225047088595, -0.23241758624423495, -...","[50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50....","[-1.2064565956503799, 46.549413992425386, 46.1..."
2,Classe A,"[-0.000791709872946977, 0.012913326083018654, ...","[0.01296542349644882, 0.018673421085287897, 0....","[-0.0579970899263519, -0.030174625679562896, -...","[50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50....","[62.82408284904416, 68.29160592831909, 40.7393..."
3,Classe B,"[-0.086419360209154, -0.11283018665037549, -0....","[-0.25415441303441283, -0.03232926341808012, 0...","[-0.200594730655575, 0.3150522794992769, 0.170...","[50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50....","[-7.0948942643893655, 51.408031847706255, 76.6..."
4,Classe D,"[-0.18597495510633114, -0.06697250604841758, 0...","[-0.13439082930684412, -0.047646927833698914, ...","[0.5199736136662316, 0.34114925305997995, 0.17...","[50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50....","[14.008961618557876, 95.61491366358396, -11.57..."


Removing NaNs from the sensor 5

In [8]:
dataset["sensor_5/acceleration"].apply(lambda x: np.isnan(x).sum()).value_counts().head()

sensor_5/acceleration
0     30308
10    12107
9      2817
20     1453
19     1450
Name: count, dtype: int64

In [9]:
def remove_nans(input_array: np.array) -> np.array:
    """
    Remove NaN values from the input array.

    Parameters:
        input_array (np.array): The input array containing NaN values.

    Returns:
        np.array: The input array with NaN values removed.
    """
    return input_array[~np.isnan(input_array)]

In [10]:
dataset["sensor_5_wo_nans"] = dataset["sensor_5/acceleration"].apply(lambda x: remove_nans(x))

dataset["sensor_5_wo_nans"].apply(lambda x: np.isnan(x).sum()).value_counts()

sensor_5_wo_nans
0    50000
Name: count, dtype: int64

In [11]:
dataset = dataset.drop(columns=["sensor_4", "sensor_5/acceleration"])
dataset = dataset.rename(columns={"sensor_5_wo_nans": "sensor_5/acceleration"})

## Signal Representations

### Velocity

Transforming the acceleration signal from the vibration signal into a velocity signal.

In [12]:
ACCELERATION_COLUMNS = ["sensor_1/acceleration", "sensor_2/acceleration", "sensor_3/acceleration"]

In [13]:
velocity_columns = [f"sensor_{num}/velocity" for num in [1,2,3]]

dataset[velocity_columns] = dataset[ACCELERATION_COLUMNS].map(lambda x: acc_to_vel(x, SAMPLING_RATE))

### Displacement

From the velocity signal, transform into a displacement signal.

In [14]:
displacement_columns = [f"sensor_{num}/displacement" for num in [1,2,3]]

dataset[displacement_columns] = dataset[ACCELERATION_COLUMNS].map(lambda x: acc_to_displ(x, SAMPLING_RATE))

### Envelope spectrum

Get the spectrum of the signal's envelope. A useful representation to extract fault characteristics that modulate the signal. 

Usually used for bearing faults. 

In [15]:
envelope_spectrum_columns = [f"sensor_{num}/envelope_spectrum" for num in [1,2,3]]
intermediate_acc_columns = [f"sensor_{num}/int_acceleration" for num in [1,2,3]]

dataset[intermediate_acc_columns] = dataset[ACCELERATION_COLUMNS].map(lambda x: array_filter(x, SAMPLING_RATE, cutoff=[1000, 2500], filt_order=12))

dataset[envelope_spectrum_columns] = dataset[intermediate_acc_columns].map(lambda x: envelope_spectrum(x))

dataset = dataset.drop(columns=intermediate_acc_columns)

### Spectrum

Spectrum of the acceleration signal.

In [16]:
spectrum_columns = ["sensor_1/fft", "sensor_2/fft", "sensor_3/fft", "sensor_5/fft"]

dataset[spectrum_columns] = dataset[ACCELERATION_COLUMNS+["sensor_5/acceleration"]].map(lambda x: get_fft(x))

## Statistical Features

In [17]:
SENSOR_COLUMNS = {
    "acceleration": [f"sensor_{num}/acceleration" for num in [1,2,3,5]],
    "velocity": [f"sensor_{num}/velocity" for num in [1,2,3]],
    "fft": [f"sensor_{num}/fft" for num in [1,2,3,5]],
    "envelope_spectrum": [f"sensor_{num}/envelope_spectrum" for num in [1,2,3]],
}

In [18]:
statistics = {
    "mean": np.mean,
    "std": np.std,
    "max": np.max,
    "min": np.min,
    "median": np.median,
    "rms": rms,
    "kurtosis": kurtosis,
    "skewness": skewness,
    "shape_factor": shape_factor,
    "peak_to_peak": peak_to_peak,
}

representations = ["acceleration", "velocity"]

for representation in representations:
    statistical_columns = {
        f"{stat}": [f"{signal}/{stat}" for signal in SENSOR_COLUMNS[representation]]
        for stat in statistics.keys()
    }

    for stat, method in statistics.items():
        intermediate_df = pd.DataFrame()
        print(f"Processing {representation}/{stat}...")
        intermediate_df[statistical_columns[stat]] = dataset[
            SENSOR_COLUMNS[representation]
        ].map(lambda x: method(x))

        dataset = pd.concat([dataset, intermediate_df], axis=1)

Processing acceleration/mean...


Processing acceleration/std...
Processing acceleration/max...
Processing acceleration/min...
Processing acceleration/median...
Processing acceleration/rms...
Processing acceleration/kurtosis...
Processing acceleration/skewness...
Processing acceleration/shape_factor...
Processing acceleration/peak_to_peak...
Processing velocity/mean...
Processing velocity/std...
Processing velocity/max...
Processing velocity/min...
Processing velocity/median...
Processing velocity/rms...
Processing velocity/kurtosis...
Processing velocity/skewness...
Processing velocity/shape_factor...
Processing velocity/peak_to_peak...


In [19]:
dataset.head()

Unnamed: 0,classes,sensor_1/acceleration,sensor_2/acceleration,sensor_3/acceleration,sensor_5/acceleration,sensor_1/velocity,sensor_2/velocity,sensor_3/velocity,sensor_1/displacement,sensor_2/displacement,...,sensor_3/velocity/kurtosis,sensor_1/velocity/skewness,sensor_2/velocity/skewness,sensor_3/velocity/skewness,sensor_1/velocity/shape_factor,sensor_2/velocity/shape_factor,sensor_3/velocity/shape_factor,sensor_1/velocity/peak_to_peak,sensor_2/velocity/peak_to_peak,sensor_3/velocity/peak_to_peak
0,Classe D,"[-0.03290592200083503, -0.03745633214437494, -...","[-0.03245771343414401, -0.06589485824077518, -...","[-0.18207789777056302, -0.16599048709531383, 0...","[20.881371833507693, 15.145424452775224, 72.02...","[2.3533191648167486e-06, -2.368745977583855e-0...","[2.9084427351345122e-05, 2.1345562755401916e-0...","[-6.545696132586331e-06, -2.3826811565906707e-...","[-0.0018831024925009328, -0.001885426232304942...","[0.0012332321230948446, 0.0012541721201578938,...",...,2.873696,-0.243868,0.630321,0.032051,1.194509,1.280582,1.260934,0.000197,0.000229,0.000404
1,Classe A,"[0.19861925356262025, 0.17894949079687933, 0.0...","[-0.044002158627886055, -0.02630014078908132, ...","[-0.06512225047088595, -0.23241758624423495, -...","[-1.2064565956503799, 46.549413992425386, 46.1...","[-2.7433269779858924e-05, -1.0518342225325034e...","[1.8193700415919616e-06, -1.0451857385228289e-...","[3.4280355652460163e-05, 1.0359204385584575e-0...","[-0.0022742769196628676, -0.002284595413385911...","[0.0005995927763461408, 0.0005985674491366499,...",...,2.259346,0.042731,0.224794,0.170108,1.142212,1.233703,1.227585,0.000208,0.000219,0.000268
2,Classe A,"[-0.000791709872946977, 0.012913326083018654, ...","[0.01296542349644882, 0.018673421085287897, 0....","[-0.0579970899263519, -0.030174625679562896, -...","[62.82408284904416, 68.29160592831909, 40.7393...","[-2.3315459903352e-05, -2.2559008022513838e-05...","[-2.846901730328859e-05, -2.778081824378529e-0...","[4.383495342968563e-06, 8.101886994962403e-07,...","[-0.0006389228212233814, -0.000661053208093467...","[0.00026365689308246234, 0.0002364039103853089...",...,2.029997,-0.019624,-0.525064,-0.133808,1.149884,1.174001,1.175664,6.7e-05,5.1e-05,5.1e-05
3,Classe B,"[-0.086419360209154, -0.11283018665037549, -0....","[-0.25415441303441283, -0.03232926341808012, 0...","[-0.200594730655575, 0.3150522794992769, 0.170...","[-7.0948942643893655, 51.408031847706255, 76.6...","[5.6058808509354374e-05, 4.475705783499283e-05...","[-7.862772499017182e-05, -8.12640612183081e-05...","[-1.5201044995774578e-05, 1.647541000502212e-0...","[-0.0009457313841679349, -0.000901824710431807...","[0.00024138958515633281, 0.0001616695411011726...",...,2.460179,0.246258,-0.013538,0.034832,1.16773,1.209571,1.20363,0.00023,0.000232,0.000262
4,Classe D,"[-0.18597495510633114, -0.06697250604841758, 0...","[-0.13439082930684412, -0.047646927833698914, ...","[0.5199736136662316, 0.34114925305997995, 0.17...","[14.008961618557876, 95.61491366358396, -11.57...","[-2.3525670404374852e-05, -3.08322147782337e-0...","[-1.3271859073052802e-05, -1.833297836693113e-...","[5.3080651889846364e-05, 8.703962202522684e-05...","[0.0016869149423652473, 0.0016566685396678, 0....","[-0.0007224631307184477, -0.000740447782496407...",...,3.036628,0.244984,-0.871934,0.015879,1.163064,1.280151,1.299272,0.000161,0.000165,0.000234


## Impusive Features

In [20]:
impulsive_feats = {
    "peak": peak,
    "impulse_factor": impulse_factor,
    "crest_factor": crest_factor,
    "clearance_factor": clearance_factor,
}

representations = ["acceleration", "velocity"]

for representation in representations:
    impulsive_feats_columns = {
        f"{feat}": [f"{signal}/{feat}" for signal in SENSOR_COLUMNS[representation]]
        for feat in impulsive_feats.keys()
    }

    for feat, method in impulsive_feats.items():
        intermediate_df = pd.DataFrame()
        print(f"Processing {representation}/{feat}...")
        intermediate_df[impulsive_feats_columns[feat]] = dataset[
            SENSOR_COLUMNS[representation]
        ].map(lambda x: method(x))

        dataset = pd.concat([dataset, intermediate_df], axis=1)

Processing acceleration/peak...
Processing acceleration/impulse_factor...


Processing acceleration/crest_factor...
Processing acceleration/clearance_factor...
Processing velocity/peak...
Processing velocity/impulse_factor...
Processing velocity/crest_factor...
Processing velocity/clearance_factor...


In [21]:
dataset.head()

Unnamed: 0,classes,sensor_1/acceleration,sensor_2/acceleration,sensor_3/acceleration,sensor_5/acceleration,sensor_1/velocity,sensor_2/velocity,sensor_3/velocity,sensor_1/displacement,sensor_2/displacement,...,sensor_3/velocity/peak,sensor_1/velocity/impulse_factor,sensor_2/velocity/impulse_factor,sensor_3/velocity/impulse_factor,sensor_1/velocity/crest_factor,sensor_2/velocity/crest_factor,sensor_3/velocity/crest_factor,sensor_1/velocity/clearance_factor,sensor_2/velocity/clearance_factor,sensor_3/velocity/clearance_factor
0,Classe D,"[-0.03290592200083503, -0.03745633214437494, -...","[-0.03245771343414401, -0.06589485824077518, -...","[-0.18207789777056302, -0.16599048709531383, 0...","[20.881371833507693, 15.145424452775224, 72.02...","[2.3533191648167486e-06, -2.368745977583855e-0...","[2.9084427351345122e-05, 2.1345562755401916e-0...","[-6.545696132586331e-06, -2.3826811565906707e-...","[-0.0018831024925009328, -0.001885426232304942...","[0.0012332321230948446, 0.0012541721201578938,...",...,0.000207,2.433362,3.808749,3.469636,2.037123,2.974232,2.75164,2.791162,4.57039,4.195728
1,Classe A,"[0.19861925356262025, 0.17894949079687933, 0.0...","[-0.044002158627886055, -0.02630014078908132, ...","[-0.06512225047088595, -0.23241758624423495, -...","[-1.2064565956503799, 46.549413992425386, 46.1...","[-2.7433269779858924e-05, -1.0518342225325034e...","[1.8193700415919616e-06, -1.0451857385228289e-...","[3.4280355652460163e-05, 1.0359204385584575e-0...","[-0.0022742769196628676, -0.002284595413385911...","[0.0005995927763461408, 0.0005985674491366499,...",...,0.000138,2.204137,3.182531,2.569735,1.92971,2.579658,2.093326,2.456032,3.740394,3.035847
2,Classe A,"[-0.000791709872946977, 0.012913326083018654, ...","[0.01296542349644882, 0.018673421085287897, 0....","[-0.0579970899263519, -0.030174625679562896, -...","[62.82408284904416, 68.29160592831909, 40.7393...","[-2.3315459903352e-05, -2.2559008022513838e-05...","[-2.846901730328859e-05, -2.778081824378529e-0...","[4.383495342968563e-06, 8.101886994962403e-07,...","[-0.0006389228212233814, -0.000661053208093467...","[0.00026365689308246234, 0.0002364039103853089...",...,2.6e-05,1.936146,2.45827,2.257664,1.683775,2.093925,1.920331,2.143697,2.742221,2.56118
3,Classe B,"[-0.086419360209154, -0.11283018665037549, -0....","[-0.25415441303441283, -0.03232926341808012, 0...","[-0.200594730655575, 0.3150522794992769, 0.170...","[-7.0948942643893655, 51.408031847706255, 76.6...","[5.6058808509354374e-05, 4.475705783499283e-05...","[-7.862772499017182e-05, -8.12640612183081e-05...","[-1.5201044995774578e-05, 1.647541000502212e-0...","[-0.0009457313841679349, -0.000901824710431807...","[0.00024138958515633281, 0.0001616695411011726...",...,0.000143,2.343853,2.638394,3.109306,2.007188,2.181265,2.583273,2.636515,3.039421,3.574973
4,Classe D,"[-0.18597495510633114, -0.06697250604841758, 0...","[-0.13439082930684412, -0.047646927833698914, ...","[0.5199736136662316, 0.34114925305997995, 0.17...","[14.008961618557876, 95.61491366358396, -11.57...","[-2.3525670404374852e-05, -3.08322147782337e-0...","[-1.3271859073052802e-05, -1.833297836693113e-...","[5.3080651889846364e-05, 8.703962202522684e-05...","[0.0016869149423652473, 0.0016566685396678, 0....","[-0.0007224631307184477, -0.000740447782496407...",...,0.000118,2.273364,4.294731,3.318239,1.954634,3.354862,2.553922,2.557923,5.052758,4.074405


## Frequency bands Features

In [22]:
def filter_spectrum(input_array: np.array, cutoff: List[int], method: Callable) -> np.array:
    """Filters the input array using the specified cutoff frequencies. Applies transformation after filtering.

    Parameters:
        input_array (np.array): The input acceleration array to be filtered.
        cutoff (List[int]): The cutoff frequencies for the filter.
        method (Callable): The method to be used after filtering.

    Returns:
        np.array: The filtered spectrum after transformation.
    """
    frequencies = get_fftfreq(len(input_array), SAMPLING_RATE)
    spectrum_array = get_fft(input_array)
    mask = (frequencies > cutoff[0]) & (frequencies < cutoff[1])
    filtered_spectrum = spectrum_array * mask
    
    return method(filtered_spectrum)

In [23]:
features = {
    "rms": spectral_rms,
    "peak": peak,
}

cutoff_frequencies = [
    [5, 500],
    [500, 1000],
    [5, 1000],
    [500, 1500],
    [1000, 1500],
    [1500, 2000],
    [2000, 3000],
    [3000, 5000],
]

freq_band_features = {
    f"{feat}/{freq_band[0]}-{freq_band[1]}": [
        f"{signal}/{feat}/{freq_band[0]}-{freq_band[1]}"
        for signal in SENSOR_COLUMNS["fft"]
    ]
    for freq_band in cutoff_frequencies
    for feat in features.keys()
}

for feat, method in features.items():
    for freq_band in cutoff_frequencies:
        intermediate_df = pd.DataFrame()
        print(f"Processing fft/{feat}/{freq_band[0]}-{freq_band[1]}...")
        
        intermediate_df[freq_band_features[f"{feat}/{freq_band[0]}-{freq_band[1]}"]] = dataset[
            SENSOR_COLUMNS["acceleration"]
        ].map(lambda x: filter_spectrum(x, freq_band, method))

        dataset = pd.concat([dataset, intermediate_df], axis=1)

Processing fft/rms/5-500...


Processing fft/rms/500-1000...
Processing fft/rms/5-1000...
Processing fft/rms/500-1500...
Processing fft/rms/1000-1500...
Processing fft/rms/1500-2000...
Processing fft/rms/2000-3000...
Processing fft/rms/3000-5000...
Processing fft/peak/5-500...
Processing fft/peak/500-1000...
Processing fft/peak/5-1000...
Processing fft/peak/500-1500...
Processing fft/peak/1000-1500...
Processing fft/peak/1500-2000...
Processing fft/peak/2000-3000...
Processing fft/peak/3000-5000...


In [24]:
dataset.head()

Unnamed: 0,classes,sensor_1/acceleration,sensor_2/acceleration,sensor_3/acceleration,sensor_5/acceleration,sensor_1/velocity,sensor_2/velocity,sensor_3/velocity,sensor_1/displacement,sensor_2/displacement,...,sensor_3/fft/peak/1500-2000,sensor_5/fft/peak/1500-2000,sensor_1/fft/peak/2000-3000,sensor_2/fft/peak/2000-3000,sensor_3/fft/peak/2000-3000,sensor_5/fft/peak/2000-3000,sensor_1/fft/peak/3000-5000,sensor_2/fft/peak/3000-5000,sensor_3/fft/peak/3000-5000,sensor_5/fft/peak/3000-5000
0,Classe D,"[-0.03290592200083503, -0.03745633214437494, -...","[-0.03245771343414401, -0.06589485824077518, -...","[-0.18207789777056302, -0.16599048709531383, 0...","[20.881371833507693, 15.145424452775224, 72.02...","[2.3533191648167486e-06, -2.368745977583855e-0...","[2.9084427351345122e-05, 2.1345562755401916e-0...","[-6.545696132586331e-06, -2.3826811565906707e-...","[-0.0018831024925009328, -0.001885426232304942...","[0.0012332321230948446, 0.0012541721201578938,...",...,0.163328,5.849899,0.017268,0.029306,0.052284,13.121727,0.004112,0.010371,0.024012,13.311888
1,Classe A,"[0.19861925356262025, 0.17894949079687933, 0.0...","[-0.044002158627886055, -0.02630014078908132, ...","[-0.06512225047088595, -0.23241758624423495, -...","[-1.2064565956503799, 46.549413992425386, 46.1...","[-2.7433269779858924e-05, -1.0518342225325034e...","[1.8193700415919616e-06, -1.0451857385228289e-...","[3.4280355652460163e-05, 1.0359204385584575e-0...","[-0.0022742769196628676, -0.002284595413385911...","[0.0005995927763461408, 0.0005985674491366499,...",...,0.074488,6.792558,0.027696,0.041534,0.040669,9.754418,0.010538,0.020141,0.051326,11.225636
2,Classe A,"[-0.000791709872946977, 0.012913326083018654, ...","[0.01296542349644882, 0.018673421085287897, 0....","[-0.0579970899263519, -0.030174625679562896, -...","[62.82408284904416, 68.29160592831909, 40.7393...","[-2.3315459903352e-05, -2.2559008022513838e-05...","[-2.846901730328859e-05, -2.778081824378529e-0...","[4.383495342968563e-06, 8.101886994962403e-07,...","[-0.0006389228212233814, -0.000661053208093467...","[0.00026365689308246234, 0.0002364039103853089...",...,0.00641,7.761963,0.001289,0.001029,0.002812,10.223106,0.000354,0.000696,0.001468,8.338421
3,Classe B,"[-0.086419360209154, -0.11283018665037549, -0....","[-0.25415441303441283, -0.03232926341808012, 0...","[-0.200594730655575, 0.3150522794992769, 0.170...","[-7.0948942643893655, 51.408031847706255, 76.6...","[5.6058808509354374e-05, 4.475705783499283e-05...","[-7.862772499017182e-05, -8.12640612183081e-05...","[-1.5201044995774578e-05, 1.647541000502212e-0...","[-0.0009457313841679349, -0.000901824710431807...","[0.00024138958515633281, 0.0001616695411011726...",...,0.065001,12.102371,0.016004,0.02283,0.036518,10.587317,0.008351,0.015654,0.039173,8.57718
4,Classe D,"[-0.18597495510633114, -0.06697250604841758, 0...","[-0.13439082930684412, -0.047646927833698914, ...","[0.5199736136662316, 0.34114925305997995, 0.17...","[14.008961618557876, 95.61491366358396, -11.57...","[-2.3525670404374852e-05, -3.08322147782337e-0...","[-1.3271859073052802e-05, -1.833297836693113e-...","[5.3080651889846364e-05, 8.703962202522684e-05...","[0.0016869149423652473, 0.0016566685396678, 0....","[-0.0007224631307184477, -0.000740447782496407...",...,0.094097,6.848806,0.015646,0.051804,0.057087,8.33272,0.007081,0.015132,0.03658,9.833553


## Envelope spectrum Features

In [25]:
def filter_envelope(input_array: np.array, cutoff: List[int], method: Callable) -> np.array:
    """Filters the input array using the specified cutoff frequencies. Applies transformation after filtering.

    Parameters:
        input_array (np.array): The input acceleration array to be filtered.
        cutoff (List[int]): The cutoff frequencies for the filter.
        method (Callable): The method to be used after filtering.

    Returns:
        np.array: The filtered spectrum after transformation.
    """
    # Get envelope through hilbert transform
    envelope_time_domain = np.abs(hilbert(input_array))
    
    # Get envelope spectrum 
    frequencies = get_fftfreq(len(envelope_time_domain), SAMPLING_RATE)
    spectrum_array = get_fft(envelope_time_domain)
    
    # Filter for the specified cutoff frequencies
    mask = (frequencies > cutoff[0]) & (frequencies < cutoff[1])
    filtered_envelope_spectrum = spectrum_array * mask
    
    # Apply transformation
    return method(filtered_envelope_spectrum)

In [26]:
features = {
    "rms": spectral_rms,
    "peak": peak,
}

cutoff_frequencies = [
    [5, 300],
    [250, 500],
    [450, 750],
    [700, 1050]
]

freq_band_features = {
    f"{feat}/{freq_band[0]}-{freq_band[1]}": [
        f"{signal}/{feat}/{freq_band[0]}-{freq_band[1]}"
        for signal in SENSOR_COLUMNS["envelope_spectrum"]
    ]
    for freq_band in cutoff_frequencies
    for feat in features.keys()
}

for feat, method in features.items():
    for freq_band in cutoff_frequencies:
        intermediate_df = pd.DataFrame()
        print(f"Processing envelope_spectrum/{feat}/{freq_band[0]}-{freq_band[1]}...")
        
        intermediate_df[freq_band_features[f"{feat}/{freq_band[0]}-{freq_band[1]}"]] = dataset[
            SENSOR_COLUMNS["envelope_spectrum"]
        ].map(lambda x: filter_envelope(x, freq_band, method))

        dataset = pd.concat([dataset, intermediate_df], axis=1)

Processing envelope_spectrum/rms/5-300...


Processing envelope_spectrum/rms/250-500...
Processing envelope_spectrum/rms/450-750...
Processing envelope_spectrum/rms/700-1050...
Processing envelope_spectrum/peak/5-300...
Processing envelope_spectrum/peak/250-500...
Processing envelope_spectrum/peak/450-750...
Processing envelope_spectrum/peak/700-1050...


## Exporting datasets

In [27]:
representations_dataset = dataset[
    [
        "classes",
        "sensor_1/acceleration",
        "sensor_2/acceleration",
        "sensor_3/acceleration",
        "sensor_5/acceleration",
        "sensor_1/velocity",
        "sensor_2/velocity",
        "sensor_3/velocity",
        "sensor_1/displacement",
        "sensor_2/displacement",
        "sensor_3/displacement",
        "sensor_1/envelope_spectrum",
        "sensor_2/envelope_spectrum",
        "sensor_3/envelope_spectrum",
        "sensor_1/fft",
        "sensor_2/fft",
        "sensor_3/fft",
        "sensor_5/fft",
    ]
]

In [28]:
representations_dataset.to_parquet("data/processed/signal_representation_features.parquet")

In [29]:
specialized_dataset = dataset.drop(
    columns=[
        "sensor_1/acceleration",
        "sensor_2/acceleration",
        "sensor_3/acceleration",
        "sensor_5/acceleration",
        "sensor_1/velocity",
        "sensor_2/velocity",
        "sensor_3/velocity",
        "sensor_1/displacement",
        "sensor_2/displacement",
        "sensor_3/displacement",
        "sensor_1/envelope_spectrum",
        "sensor_2/envelope_spectrum",
        "sensor_3/envelope_spectrum",
        "sensor_1/fft",
        "sensor_2/fft",
        "sensor_3/fft",
        "sensor_5/fft",
    ]
)

In [30]:
specialized_dataset.to_parquet("data/processed/specialized_features.parquet")

In [34]:
! du -hs data/processed/specialized_features.parquet

86M	data/processed/specialized_features.parquet


In [36]:
! du -hs data/processed/signal_representation_features.parquet

966M	data/processed/signal_representation_features.parquet
