# Notebook for adapting real-world dataset to the format suitable for our methods

In [39]:
import os, random
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

import numpy as np
import pandas as pd
import torch.nn as nn
import scipy.io
from scipy.io import arff
import pickle

from modules.helpers import seed

seed(1)

Global seed set to 1


In [2]:
data_feature = pd.read_csv("../data/CWRU/feature_time_48k_2048_load_1.csv")
data_feature

Unnamed: 0,max,min,mean,sd,rms,skewness,kurtosis,crest,form,fault
0,0.35986,-0.41890,0.017840,0.122746,0.124006,-0.118571,-0.042219,2.901946,6.950855,Ball_007_1
1,0.46772,-0.36111,0.022255,0.132488,0.134312,0.174699,-0.081548,3.482334,6.035202,Ball_007_1
2,0.46855,-0.43809,0.020470,0.149651,0.151008,0.040339,-0.274069,3.102819,7.376926,Ball_007_1
3,0.58475,-0.54303,0.020960,0.157067,0.158422,-0.023266,0.134692,3.691097,7.558387,Ball_007_1
4,0.44685,-0.57891,0.022167,0.138189,0.139922,-0.081534,0.402783,3.193561,6.312085,Ball_007_1
...,...,...,...,...,...,...,...,...,...,...
2295,0.21425,-0.19839,0.010769,0.064100,0.064983,-0.212497,-0.119312,3.297037,6.034174,Normal_1
2296,0.21967,-0.20882,0.013136,0.068654,0.069883,-0.061308,-0.295122,3.143410,5.319958,Normal_1
2297,0.20799,-0.21613,0.012571,0.067128,0.068279,-0.154754,-0.071405,3.046161,5.431299,Normal_1
2298,0.21425,-0.22405,0.012608,0.066813,0.067977,-0.326966,0.023662,3.151821,5.391672,Normal_1


In [40]:
# Label 0 corresponds to fault in Bearing, 1 corresponds to normal bearing

data1 = scipy.io.loadmat("../data/CWRU/raw/B007_1_123.mat")
data_Ball_007_1 = pd.concat((pd.DataFrame(data1["X123_DE_time"]), pd.DataFrame(data1["X123_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_Ball_007_1)/2048)) * (np.arange(np.floor(len(data_Ball_007_1)/2048))+1).astype(int)
data_Ball_007_1 = pd.DataFrame(np.concatenate(np.split(data_Ball_007_1,split_index)[:-1], axis = 1).T)
data_Ball_007_1["label"] = 0 

data2 = scipy.io.loadmat("../data/CWRU/raw/B014_1_190.mat")
data_Ball_014_1 = pd.concat((pd.DataFrame(data2["X190_DE_time"]), pd.DataFrame(data2["X190_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_Ball_014_1)/2048)) * (np.arange(np.floor(len(data_Ball_014_1)/2048))+1).astype(int)
data_Ball_014_1 = pd.DataFrame(np.concatenate(np.split(data_Ball_014_1,split_index)[:-1], axis = 1).T)
data_Ball_014_1["label"] = 0 

data3 = scipy.io.loadmat("../data/CWRU/raw/B021_1_227.mat")
data_Ball_021_1 = pd.concat((pd.DataFrame(data3["X227_DE_time"]), pd.DataFrame(data3["X227_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_Ball_021_1)/2048)) * (np.arange(np.floor(len(data_Ball_021_1)/2048))+1).astype(int)
data_Ball_021_1 = pd.DataFrame(np.concatenate(np.split(data_Ball_021_1,split_index)[:-1], axis = 1).T)
data_Ball_021_1["label"] = 0 

data4 = scipy.io.loadmat("../data/CWRU/raw/IR007_1_110.mat")
data_IR_007_1 = pd.concat((pd.DataFrame(data4["X110_DE_time"]), pd.DataFrame(data4["X110_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_IR_007_1)/2048)) * (np.arange(np.floor(len(data_IR_007_1)/2048))+1).astype(int)
data_IR_007_1 = pd.DataFrame(np.concatenate(np.split(data_IR_007_1,split_index)[:-1], axis = 1).T)
data_IR_007_1["label"] = 0 

data5 = scipy.io.loadmat("../data/CWRU/raw/IR014_1_175.mat")
data_IR_014_1 = pd.concat((pd.DataFrame(data5["X217"]), pd.DataFrame(data5["X217_DE_time"]), pd.DataFrame(data5["X175_DE_time"]), pd.DataFrame(data5["X175_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_IR_014_1)/2048)) * (np.arange(np.floor(len(data_IR_014_1)/2048))+1).astype(int)
data_IR_014_1 = pd.DataFrame(np.concatenate(np.split(data_IR_014_1,split_index)[:-1], axis = 1).T)
data_IR_014_1["label"] = 0 

data6 = scipy.io.loadmat("../data/CWRU/raw/IR021_1_214.mat")
data_IR_021_1 = pd.concat((pd.DataFrame(data6["X214_DE_time"]), pd.DataFrame(data6["X214_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_IR_021_1)/2048)) * (np.arange(np.floor(len(data_IR_021_1)/2048))+1).astype(int)
data_IR_021_1 = pd.DataFrame(np.concatenate(np.split(data_IR_021_1,split_index)[:-1], axis = 1).T)
data_IR_021_1["label"] = 0 

data7 = scipy.io.loadmat("../data/CWRU/raw/OR007_6_1_136.mat")
data_OR_007_6_1 = pd.concat((pd.DataFrame(data7["X136_DE_time"]), pd.DataFrame(data7["X136_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_OR_007_6_1)/2048)) * (np.arange(np.floor(len(data_OR_007_6_1)/2048))+1).astype(int)
data_OR_007_6_1 = pd.DataFrame(np.concatenate(np.split(data_OR_007_6_1,split_index)[:-1], axis = 1).T)
data_OR_007_6_1["label"] = 0 

data8 = scipy.io.loadmat("../data/CWRU/raw/OR014_6_1_202.mat")
data_OR_014_6_1 = pd.concat((pd.DataFrame(data8["X202_DE_time"]), pd.DataFrame(data8["X202_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_OR_014_6_1)/2048)) * (np.arange(np.floor(len(data_OR_014_6_1)/2048))+1).astype(int)
data_OR_014_6_1 = pd.DataFrame(np.concatenate(np.split(data_OR_014_6_1,split_index)[:-1], axis = 1).T)
data_OR_014_6_1["label"] = 0 

data9 = scipy.io.loadmat("../data/CWRU/raw/OR021_6_1_239.mat")
data_OR_021_6_1 = pd.concat((pd.DataFrame(data9["X239_DE_time"]), pd.DataFrame(data9["X239_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_OR_021_6_1)/2048)) * (np.arange(np.floor(len(data_OR_021_6_1)/2048))+1).astype(int)
data_OR_021_6_1 = pd.DataFrame(np.concatenate(np.split(data_OR_021_6_1,split_index)[:-1], axis = 1).T)
data_OR_021_6_1["label"] = 0 

In [41]:
data10 = scipy.io.loadmat("../data/CWRU/raw/Time_Normal_1_098.mat")
data_Normal_1 = pd.concat((pd.DataFrame(data10["X098_DE_time"]), pd.DataFrame(data10["X098_FE_time"])))
split_index = np.repeat(2048,np.floor(len(data_Normal_1)/2048)) * (np.arange(np.floor(len(data_Normal_1)/2048))+1).astype(int)
data_Normal_1 = pd.DataFrame(np.concatenate(np.split(data_Normal_1,split_index)[:-1], axis = 1).T)
data_Normal_1["label"] = 1

In [59]:
data_CWRU = pd.concat([data_Ball_007_1, data_Ball_014_1, data_Ball_021_1, data_IR_007_1, data_IR_014_1, data_IR_021_1, data_OR_007_6_1, data_OR_014_6_1, data_OR_021_6_1, data_Normal_1])
data_CWRU["index"] = np.arange(len(data_CWRU))
data_CWRU.set_index('index', inplace=True)

In [62]:
with open("../data/CWRU/CWRU_full_dataframe", "wb") as output_file:
    pickle.dump(data_CWRU, output_file)