In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import re
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import GridSearchCV
from sklearn import linear_model
from scipy import interpolate
import scipy.io as sio
from numpy import *

min_max_scaler = preprocessing.MinMaxScaler()

In [None]:
max_window_stamp = 50
window_size = 40
stride = 25

In [None]:
train_acc = []
train_y = []

In [None]:
def get_train_acc(bearing_folders, folder_type):
    for bearing_folder in bearing_folders:
        csv_files = os.listdir(f'./{folder_type}/{bearing_folder}')
        csv_files.sort()
        acc_files = [acc for acc in csv_files if acc.startswith('acc')]
        for stamp, acc_file in enumerate(acc_files):
            vibra = pd.read_csv(f'./{folder_type}/{bearing_folder}/{acc_file}',header=None,sep=',', usecols=[4,5])
            for i in range(max_window_stamp):
                train_acc.append(vibra.iloc[i*stride:i*stride + window_size, :].values.tolist())
                train_rul = len(acc_files) - stamp
                train_y.append(train_rul)

In [None]:
bearing_folders = ['Bearing1_1', 'Bearing1_2']
folder_type = 'Learning_set'
get_train_acc(bearing_folders, folder_type)

In [None]:
bearing_folders = ['Bearing1_3', 'Bearing1_4']
folder_type = 'Validation_Set/Full_Test_Set/'
get_train_acc(bearing_folders, folder_type)

In [None]:
train_acc = np.array(train_acc)
len_x = len(train_acc)
dim_x = train_acc.shape[2]
train_acc = train_acc.reshape(-1, dim_x)
print(train_acc.shape)
train_acc = min_max_scaler.fit_transform(train_acc)
train_acc = np.reshape(train_acc,(len_x, 40, dim_x))

In [None]:
shape(train_acc)

In [None]:
plt.plot(train_acc[:, 0])

In [None]:
condition = 'Bearing1'

In [None]:
sio.savemat(f'./Mat_train/{condition}1-4_2d_train_stride{stride}_x.mat', {"train_x": train_acc})
sio.savemat(f'./Mat_train/{condition}1-4_2d_train_stride{stride}_y.mat', {"train_y": train_y})

# Test

In [10]:
folder_type = 'Test_set'
# bearing_folders = ['Bearing1_6', 'Bearing1_7']
bearing_folders = ['Bearing1_5']
condition = 'Bearing1'
test_life = {
    'Bearing1_3': 2375,
    'Bearing1_4': 1428,
    'Bearing1_5': 2463,
    'Bearing1_6': 2448,
    'Bearing1_7': 2259,
    'Bearing2_3': 1955,
    'Bearing2_4': 751,
    'Bearing2_5': 2311,
    'Bearing2_6': 701,
    'Bearing2_7': 230,
    'Bearing3_3': 434,
}
test_end = {
    'Bearing1_3': 1802,
    'Bearing1_4': 1139,
    'Bearing1_5': 2302,
    'Bearing1_6': 2302,
    'Bearing1_7': 1502,
    'Bearing2_3': 1202,
    'Bearing2_4': 612,
    'Bearing2_5': 2002,
    'Bearing2_6': 572,
    'Bearing2_7': 172,
    'Bearing3_3': 352,
}
test_rul = {
    'Bearing1_3': 573,
    'Bearing1_4': 289,
    'Bearing1_5': 161,
    'Bearing1_6': 146,
    'Bearing1_7': 757,
    'Bearing2_3': 753,
    'Bearing2_4': 139,
    'Bearing2_5': 309,
    'Bearing2_6': 129,
    'Bearing2_7': 58,
    'Bearing3_3': 82,
}

In [11]:
test_x = []
test_y = []
max_window_stamp = 50
window_size = 40
test_acc = []
test_temp = []
stride = 25

In [11]:
csv_files = os.listdir(f'./{folder_type}/{bearing_folder}')
csv_files.sort()
acc_files = [acc for acc in csv_files if acc.startswith('acc')]
acc_files = acc_files[-50:]
print(len(acc_files))

50


In [12]:
def get_test_acc(bearing_folders, folder_type):
    for bearing_folder in bearing_folders:
        csv_files = os.listdir(f'./{folder_type}/{bearing_folder}')
        csv_files.sort()
        acc_files = [acc for acc in csv_files if acc.startswith('acc')]
        acc_files = acc_files[-50:]
        for index, acc_file in enumerate(acc_files):
            vibra = pd.read_csv(f'./{folder_type}/{bearing_folder}/{acc_file}',header=None,sep=',', usecols=[4,5])
            for i in range(max_window_stamp):
                test_acc.append(vibra.iloc[i*stride:i*stride + window_size, :].values.tolist())
                test_y.append(test_rul[bearing_folder] + 50 - index)

In [13]:
get_test_acc(bearing_folders, folder_type)

In [6]:
train_x = sio.loadmat(f'./Mat_train/Bearing11-4_2d_train_stride25_x.mat')['train_x']  # load sliding window preprocessed and Statistical features processed data (mean value and regression coefficient estimates feature)
len_x = len(train_x)
dim_x = train_x.shape[2]
train_x = train_x.reshape(-1, dim_x)
min_max_scaler.fit(train_x)
del train_x

In [14]:
test_acc = np.array(test_acc)
len_x = len(test_acc)
dim_x = test_acc.shape[2]
test_acc = test_acc.reshape(-1, dim_x)
print(test_acc.shape)
test_acc = min_max_scaler.transform(test_acc)
test_acc = np.reshape(test_acc,(len_x, 40, dim_x))

(100000, 2)


In [15]:
shape(test_acc)

(2500, 40, 2)

In [9]:
sio.savemat(f'./Mat_test/{condition}_6-7_test_x.mat', {"test_x": test_acc})
sio.savemat(f'./Mat_test/{condition}_6-7_test_y.mat', {"test_y": test_y})

In [16]:
sio.savemat(f'./Mat_valid/{condition}_5_valid_x.mat', {"valid_x": test_acc})
sio.savemat(f'./Mat_valid/{condition}_5_valid_y.mat', {"valid_y": test_y})

# Appendix

In [None]:
csv_files = os.listdir(f'./{folder_type}/{bearing_folder}')
csv_files.sort()
acc_files = [acc for acc in csv_files if acc.startswith('acc')]

In [None]:
for stamp, acc_file in enumerate(acc_files):
    vibra = pd.read_csv(f'./{folder_type}/{bearing_folder}/{acc_file}',header=None,sep=',', usecols=[4,5])
    for i in range(max_window_stamp):
        train_acc.append(vibra.iloc[i*stride:i*stride + window_size, :].values.tolist())
        train_rul = len(acc_files) - stamp
        train_y.append(train_rul)

In [17]:
y_test = sio.loadmat(f'./Mat_test/Bearing1_6-7_test_y.mat')['test_y']

In [19]:
y_test.shape

(1, 5000)

In [18]:
print(y_test[0])

[196 196 196 ... 758 758 758]
