## Bibliotecas Necessárias

In [3]:
from glob import glob
from scipy.fft import fft
from typing import Tuple
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score

import kagglehub
import pandas as pd
import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim


## Baixando os Dados

In [4]:
# Download latest version
dataset_path = kagglehub.dataset_download(
    "uysalserkan/fault-induction-motor-dataset",
    force_download=True,
)

print("Path to dataset files:", dataset_path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/uysalserkan/fault-induction-motor-dataset?dataset_version_number=1...


100%|██████████| 2.41G/2.41G [02:49<00:00, 15.2MB/s]

Extracting files...





Path to dataset files: /home/greca/.cache/kagglehub/datasets/uysalserkan/fault-induction-motor-dataset/versions/1


## Pré-processamento dos Dados

In [5]:
# função responsável por ler cada todos os arquivos `csv` de dentro de uma pasta
def read_data(
    path: str
) -> pd.DataFrame:
    files = sorted(glob(os.path.join(path, "*.csv")))
    data = [
        pd.read_csv(file, header=None)
        for file in files
    ]
    data = pd.concat(
        data,
        ignore_index=True,
        axis=0
    )

    return data

# realiza o downsample dos sinais
def downsample(
    data: pd.DataFrame,
    sampling_rate: int,
) -> pd.DataFrame:
    downsampled_data = pd.DataFrame()

    for index in range(0, data.shape[0], sampling_rate):
        data_chunk = data.iloc[index:(index+sampling_rate), :]    
        data_chunk = data_chunk.sum()/data_chunk.shape[0]
        data_chunk = np.reshape(data_chunk, (1, data_chunk.shape[0]))

        downsampled_data = pd.concat(
            [downsampled_data, pd.DataFrame(data_chunk)],
            axis=0,
            ignore_index=True
        )
    
    return downsampled_data

# aplica o Fast Fourier Transform (FFT) nos sinais
def extract_fft(
    data: pd.DataFrame,
) -> pd.DataFrame:
    data = data.apply(lambda x: fft(x), axis=1, result_type="expand")
    return data

# divide os sinais em janelas para serem usadas no modelo de aprendizado profundo
# a saída dessa função é um numpy array no formato (BATCH, WINDOW_SIZE, FEATURES)
def batch_data(
    data: pd.DataFrame,
    window_size: int,
    stride: int,
) -> np.ndarray:
    data = data.to_numpy()
    chunks = []

    for index in range(0, data.shape[0], stride):
        chunk_data = data[index:(index + window_size), :]
        chunk_data = np.reshape(chunk_data, (1, chunk_data.shape[0], chunk_data.shape[1]))

        # descartando o último batch se ele tiver um tamanho menor que o da janela
        if chunk_data.shape[1] < window_size:
            continue

        chunks.append(chunk_data)

    chunks = np.concatenate(chunks, axis=0)
    return chunks

# realiza a leitura e o pré-processamento dos dados das classes de desbalanceamento
imbalance_classes = ["6g", "10g", "15g", "20g", "25g", "30g"]
imbalance_files_path = os.path.join(dataset_path, "imbalance", "imbalance")
imbalance_targets = []
imbalance_data = []

for iclass in imbalance_classes:
    # etapas do pré-processamento (leitura, downsample, extrai FFT e divide os dados em pequenas janelas)
    imbalance_class_dataset = read_data(path=os.path.join(imbalance_files_path, iclass))
    imbalance_class_dataset = downsample(data=imbalance_class_dataset, sampling_rate=2000)
    imbalance_class_dataset = extract_fft(data=imbalance_class_dataset)
    imbalance_class_data = batch_data(
        data=imbalance_class_dataset,
        window_size=100,
        stride=50,
    )
    imbalance_data.append(imbalance_class_data)

    imbalance_class_target = [iclass] * imbalance_class_data.shape[0]
    imbalance_targets.extend(imbalance_class_target)

imbalance_data = np.concatenate(imbalance_data, axis=0)
imbalance_data.shape, len(imbalance_targets)

((712, 100, 8), 712)

In [6]:
# realiza a leitura e o pré-processamento dos dados da classe normal
normal_files_path = os.path.join(dataset_path, "normal", "normal")

# etapas do pré-processamento (leitura, downsample, extrai FFT e divide os dados em pequenas janelas)
normal_dataset = read_data(path=normal_files_path)
normal_dataset = downsample(data=normal_dataset, sampling_rate=2000)
normal_dataset = extract_fft(data=normal_dataset)
normal_data = batch_data(
    data=normal_dataset,
    window_size=100,
    stride=50,
)
normal_target = ["normal"] * normal_data.shape[0]
normal_data.shape, len(normal_target)

((121, 100, 8), 121)

In [7]:
# junta os dados das classes de desbalanceamento e normais em uma variável
X = np.concatenate([imbalance_data, normal_data], axis=0)
y = imbalance_targets + normal_target
y = np.asarray(y)

X.shape, y.shape

((833, 100, 8), (833,))

## Treinamento do Modelo

In [None]:
# cria a classe do modelo Long Short-Term Memory (LSTM)
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTM, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        # out, _ = self.lstm(x, (h0, c0)) # pegando somente o output da camada LSTM
        out, _ = self.lstm(x) # pegando somente o output da camada LSTM
        out = self.fc(out[:, -1, :])
        return out

In [9]:
# função responsável por realizar o treinamento do modelo com os dados de treinamento
def train(
    X_train: torch.Tensor,
    y_train: torch.Tensor,
    model: LSTM,
    criterion: nn.CrossEntropyLoss,
    optimizer: optim.Adam,
) -> Tuple[float, float]:
    model.train()
    predictions = []
    targets = []

    # forward pass
    optimizer.zero_grad()
    outputs = model(X_train)

    # backward pass
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()

    # junta todas as predições em uma lista para calcular o f1-score
    prediction = outputs.argmax(dim=-1, keepdim=True).to(dtype=torch.int)
    prediction = prediction.detach().cpu().numpy()
    predictions.extend(prediction.tolist())

    # transforma o array ground-truth em uma lista para calcular o f1-score
    target = y_train.argmax(dim=-1, keepdim=True).to(dtype=torch.int)
    target = target.detach().cpu().numpy()
    targets.extend(target.tolist())

    f1 = accuracy_score(y_true=targets, y_pred=predictions)
    return loss, f1

# função responsável por a validação do modelo com os dados de teste
def test(
    X_valid: torch.Tensor,
    y_valid: torch.Tensor,
    model: LSTM
) -> float:
    model.eval()
    predictions = []
    targets = []

    with torch.inference_mode():
        outputs = model(X_valid)

    # junta todas as predições em uma lista para calcular o f1-score
    prediction = outputs.argmax(dim=-1, keepdim=True).to(dtype=torch.int)
    prediction = prediction.detach().cpu().numpy()
    predictions.extend(prediction.tolist())

    # transforma o array ground-truth em uma lista para calcular o f1-score
    target = y_valid.argmax(dim=-1, keepdim=True).to(dtype=torch.int)
    target = target.detach().cpu().numpy()
    targets.extend(target.tolist())

    f1 = accuracy_score(y_true=targets, y_pred=predictions)
    return f1

# define as entradas do modelo e outros parâmetros utilizados no treinamento
input_size = X.shape[2]
hidden_size = 128
num_layers = 5
output_size = 7
cv = 5
epochs = 300
lr = 1e-3
skf = StratifiedKFold(n_splits=cv)
device = torch.device("cuda")

for i, (train_index, valid_index) in enumerate(skf.split(X, y)):
    ohe = OneHotEncoder(sparse_output=False)

    # inicializa o modelo
    model = LSTM(input_size, hidden_size, num_layers, output_size).to(device)

    # inicializando a função custo e o otimizador
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    # convertendo os dados para o formato torch.Tensor e aplicando One Hot Encoder (OHE)
    # para o array y contendo as classes dos dados
    X_train = torch.tensor(X[train_index, :, :], dtype=torch.float32).to(device)
    y_train = ohe.fit_transform(y[train_index].reshape(-1, 1))
    y_train = torch.tensor(y_train, dtype=torch.float32).to(device)

    X_valid = torch.tensor(X[valid_index, :, :], dtype=torch.float32).to(device)
    y_valid = ohe.transform(y[valid_index].reshape(-1, 1))
    y_valid = torch.tensor(y_valid, dtype=torch.float32).to(device)

    print("#" * 25)
    print(f"FOLD: {i+1}")
    print("#" * 25); print()

    # treinamento do modelo
    for epoch in range(1, epochs+1):
        train_loss, train_f1 = train(
            X_train=X_train,
            y_train=y_train,
            model=model,
            criterion=criterion,
            optimizer=optimizer,
        )

        valid_f1 = test(
            X_valid=X_valid,
            y_valid=y_valid,
            model=model,
        )

        print(f"Epoch [{epoch}/{epochs}]:")
        print(f"Loss: {train_loss}, Train F1-Score: {train_f1}")
        print(f"Valid F1-Score: {valid_f1}"); print()


  X_train = torch.tensor(X[train_index, :, :], dtype=torch.float32).to(device)


#########################
FOLD: 1
#########################
Epoch [1/300]:
Loss: 1.9483221769332886, Train F1-Score: 0.13813813813813813
Valid F1-Score: 0.1497005988023952

Epoch [2/300]:
Loss: 1.9470891952514648, Train F1-Score: 0.14414414414414414
Valid F1-Score: 0.1497005988023952

Epoch [3/300]:
Loss: 1.9461920261383057, Train F1-Score: 0.14414414414414414
Valid F1-Score: 0.1497005988023952

Epoch [4/300]:
Loss: 1.9456162452697754, Train F1-Score: 0.14414414414414414
Valid F1-Score: 0.1317365269461078

Epoch [5/300]:
Loss: 1.9454452991485596, Train F1-Score: 0.14564564564564564
Valid F1-Score: 0.1317365269461078

Epoch [6/300]:
Loss: 1.9454338550567627, Train F1-Score: 0.14864864864864866
Valid F1-Score: 0.1317365269461078

Epoch [7/300]:
Loss: 1.9450041055679321, Train F1-Score: 0.15165165165165165
Valid F1-Score: 0.1317365269461078

Epoch [8/300]:
Loss: 1.9441564083099365, Train F1-Score: 0.15465465465465467
Valid F1-Score: 0.11976047904191617

Epoch [9/300]:
Loss: 1.942914962768