In [None]:
# imports
import os
import glob
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# extracting features - vibrations
def extract_features(signal: np.ndarray) -> dict:
    return {
        "mean": np.mean(signal),
        "std": np.std(signal),
        "var": np.var(signal),
        "rms": np.sqrt(signal),
        "max": np.max(signal),
        "min": np.min(signal),
        "skew": pd.Series(signal).skew(),
        "kurt": pd.Series(signal).kurtosis(),
    }

In [None]:
# loading data
def load_dataset(base_path: str):
    X = []
    y = []

    # normal
    normal_path = os.path.join(base_path, "normal")
    normal_files = glob.glob(os.path.join(normal_path, "*.csv"))

    for file in normal_files:
        signal = pd.read_csv(file, header=None).values.flatten()
        features = extract_features(signal)
        X.append(features)
        y.append("normal")

    # imbalance and subfolders
    imbalance_path = os.path.join(base_path, "imbalance")
    imbalance_folders = os.listdir(imbalance_path)

    for folder in imbalance_folders:
        folder_path = os.path.join(imbalance_path, folder)
        files = glob.glob(os.path.join(folder_path, "*.csv"))

    for file in files:
        signal = pd.read_csv(file, header=None).values.flatten()
        features = extract_features(signal)
        X.append(features)
        y.append("imbalance")

    return pd.DataFrame(X), pd.Series(y)