In [2]:
import os
import glob
import numpy as np
import pandas as pd

from scipy.fft import fft

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import matplotlib.pyplot as plt
import seaborn as sns

from xgboost import XGBClassifier

In [3]:
# pre processing
def extract_features(signal: np.ndarray, n_freqs=20) -> dict:
    signal = signal.astype(float)
    fft_values = np.abs(fft(signal))

    features = {}
    for x in range(1, n_freqs + 1):
        features[f"fft_{x}"] = float(fft_values[x])

    return features

In [None]:
# loading data
def load_dataset(base_path: str):
    X = []
    y = []

    # normal
    normal_path = os.path.join(base_path, "normal")
    normal_files = glob.glob(os.path.join(normal_path, "*.csv"))

    for file in normal_files:
        df = pd.read_csv(file, header=None)
        signal = df.iloc[:,0].values
        features = extract_features(signal)
        X.append(features)
        y.append("normal")

    # imbalance
    imbalance_path = os.path.join(base_path, "imbalace")
    imbalance_folders = os.listdir(imbalance_path)

    for folder in imbalance_folders:
        folder_path = os.path.join(imbalance_path, folder)
        files = glob.glob(os.path.join(folder_path, "*.csv"))

        for file in files:
            df = pd.read_csv(file, header=None)
            signal = df.iloc[:,0].values
            features = extract_features(signal)
            X.append(features)
            y.append("imbalance")