In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft
import os
from scipy.optimize import fsolve
from functools import partial


In [12]:
FEMTO = '../../../data/10FEMTO/raw_data/Validation_set/'
POST_PROCESS = '../../../data/10FEMTO/processed_data/'
DATASET = [('Bearing1_1', True, 100), ('Bearing1_2', True, 10), ('Bearing1_3', False, 100), ('Bearing1_4', True, 40)]
# DATASET = [('Bearing1_3', False), ('Bearing1_4', True), ('Bearing1_5', True), ('Bearing1_6', True), ('Bearing1_7', True), ('Bearing2_3', False), ('Bearing3_1', False)]
LEN = 2560
WINDOW_SIZE = 40

In [35]:
class Process():
    def __init__(self, folders: [(str, bool, int)], window_size: int) -> None:
        self.folders = folders
        self.features = []
        self.life = None
        self.Y = []
        self.train = None
        self.window_size = window_size

    def _equation(self, tau, a, convergence):
        return 1 + np.exp(a) - np.exp((convergence * tau) + a)
    
    def HI(self, t, a , tau):
        return 1 + np.exp(a) - np.exp((t * tau) + a)
    
    def loop_folder(self):
        for (folder, is_train, convergence) in self.folders:
            self.features = []
            self.Y = []
            print(folder, is_train)
            try:
                self.train = is_train
                accs = os.listdir(FEMTO+folder)
                accs.sort()
                accs = [acc for acc in accs if acc.startswith('acc')]
                self.life = len(accs)
                # for stamp, acc in enumerate(accs):
                #     feature = self._extract_feature(f'{FEMTO}/{folder}/{acc}')
                #     self.features.append(feature)
                # print(self.features.shape, self.Y.shape)
                # self.features = self._slide_x_window()
                y_HI = self._reconstruct_HI(convergence)
                self.Y = self._slide_y_window(y_HI)
                # self._save_x_data(folder)
                self._save_y_data(folder)
            except Exception as e:
                print(e)
                print(folder)
                
    def _save_x_data(self, folder: str):
        np.save(f'{POST_PROCESS}{folder}/{folder}_train_X', self.features) if self.train else np.save(f'{POST_PROCESS}{folder}/{folder}_test_X', self.features)

    def _save_y_data(self, folder):
        np.save(f'{POST_PROCESS}{folder}/{folder}_train_Y', self.Y) if self.train else np.save(f'{POST_PROCESS}{folder}/{folder}_test_Y', self.Y)
    
    def _reconstruct_HI(self, convergence: int):
        initial_guess = 0
        a = 1
        result = fsolve(self._equation, initial_guess, args=(a, convergence))
        tau = result[0]
        print(f"The solution for τ is: {tau}")
        partial_HI = partial(self.HI, a=a, tau=tau)
        rul = [i for i in range(self.life)]
        hi_y = list(map(partial_HI, rul))
        min_value = min(hi_y)
        max_value = max(hi_y)
        normalized_values = [(x - min_value) / (max_value - min_value) for x in hi_y]
        # plt.plot(normalized_values)
        # plt.show()
        return normalized_values

    def _extract_feature(self, acc: str):
        x = pd.read_csv(acc, header=None, sep=',', usecols=[4])
        # time zone
        x_abs = x.abs()
        x_avg = x.sum() / LEN
        mean_square_mean = ((x.apply(lambda x: x - x_avg)) ** 2).sum()
        p1 = x.max()
        p2 = x.min()
        p3 = x_abs.max()
        p4 = p1 - p2
        p5 = x_abs.sum() / LEN
        p6 = (x_abs.sum() ** 0.5 / LEN) * 2
        p7 = mean_square_mean / (LEN -1)
        p8 = (mean_square_mean / LEN) ** 0.5
        p9 = ((x ** 2).sum() / LEN) ** 0.5
        p11 = (LEN * p9) / x_abs.sum()
        p12 = p9 / p5
        p13 = p3 / p9
        p14 = p3 / p5
        p15 = p3 / p6
        p16 = p3 / (p9 ** 2)
        # frequency zone
        fft_result = np.fft.fft(x.to_numpy(), axis=0)
        N = len(fft_result)
        amplitudes = np.abs(fft_result)
        p17 = np.sum(amplitudes) / N
        return [p1.iloc[0], p2.iloc[0], p3.iloc[0], p4.iloc[0], p5.iloc[0], p6.iloc[0], 
                p7.iloc[0], p8.iloc[0], p9.iloc[0], p11.iloc[0], p12.iloc[0], 
                p13.iloc[0], p14.iloc[0], p15.iloc[0], p16.iloc[0], p17]
    
    def _slide_y_window(self, y_hi):
        y_windows = []
        for i in range(self.life - self.window_size):
            y_window = np.array(y_hi)[i + 40]
            y_windows.append(y_window)
        return np.array(y_windows)

    def _slide_x_window(self):
        feature_windows = []
        for i in range(self.life - self.window_size):
            feature_window = np.array(self.features)[i:i + self.window_size, :]
            feature_windows.append(feature_window)
        return np.array(feature_windows)

In [36]:
process = Process(DATASET, WINDOW_SIZE)
process.loop_folder()
X = np.array(process.features)
Y = np.array(process.Y)
print(X.shape)
print(Y.shape)

Bearing1_1 True
The solution for τ is: 0.0031326168751822274
[1.0, 0.9999995162793023, 0.999999031040917, 0.9999985442800825, 0.9999980559920219, 0.9999975661719434, 0.9999970748150405, 0.9999965819164912, 0.9999960874714585, 0.9999955914750905, 0.9999950939225196, 0.9999945948088632, 0.9999940941292235, 0.9999935918786871, 0.9999930880523252, 0.9999925826451938, 0.9999920756523328, 0.9999915670687674, 0.9999910568895064, 0.9999905451095433, 0.9999900317238558, 0.9999895167274061, 0.9999890001151404, 0.9999884818819889, 0.9999879620228659, 0.9999874405326701, 0.999986917406284, 0.9999863926385736, 0.9999858662243897, 0.999985338158566, 0.9999848084359209, 0.9999842770512557, 0.9999837439993559, 0.9999832092749904, 0.9999826728729122, 0.9999821347878569, 0.9999815950145442, 0.9999810535476775, 0.9999805103819428, 0.9999799655120102, 0.9999794189325324, 0.999978870638146, 0.9999783206234701, 0.9999777688831075, 0.9999772154116437, 0.9999766602036473, 0.99997610325367, 0.9999755445562462,

In [None]:
x = pd.read_csv('../../../data/10FEMTO/raw_data/Validation_set/Bearing1_1/acc_02346.csv', header=None, sep=',', usecols=[4])
# time zone
x_abs = x.abs()
x_avg = x.sum() / LEN
mean_square_mean = ((x.apply(lambda x: x - x_avg)) ** 2).sum()
p1 = x.min()
p2 = x.max()
p3 = x_abs.max()
p4 = p2 - p1
p5 = x_abs.sum() / LEN
p6 = (x_abs.sum() ** 0.5 / LEN) * 2
p7 = mean_square_mean / (LEN -1)
p8 = (mean_square_mean / LEN) ** 0.5
p9 = ((x ** 2).sum() / LEN) ** 0.5
p10 = ((x.apply(lambda x: x - x_avg)) ** 3).sum() / ((LEN - 1) * (p8 ** 3))
p11 = (LEN * p9) / x_abs.sum()
p12 = p9 / p5
p13 = p3 / p9
p14 = p3 / p5
p15 = p3 / p6
p16 = p3 / (p9 ** 2)
# frequency zone
fft_result = np.fft.fft(x.to_numpy())
N = len(fft_result)
frequencies = np.fft.fftfreq(N, d=1)  # 采样频率为 2560 Hz
amplitudes = np.abs(fft_result)
p17 = np.sum(amplitudes) / N
p18 = np.sum(frequencies * amplitudes) / np.sum(amplitudes)
p19 = (np.sum(frequencies ** 2 * amplitudes) / np.sum(amplitudes)) ** 0.5
p20 = ((np.sum((frequencies - p18) ** 2 * amplitudes)) / np.sum(amplitudes)) ** 0.5