In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import os
os.chdir('code/bearing-failure-model/')

Imports

In [44]:
import re
import json 

import scipy.io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sqlalchemy import create_engine
from sqlalchemy.engine import URL

from utils.db import bulk_insert_df

In [45]:
with open('conf/globalresources.json', 'r') as f:
    db = json.load(f)

In [46]:
# Connect to database
url = URL.create(**db)
engine = create_engine(url)
conn = engine.raw_connection()

Aux path variables to make it easier for the loops.

In [47]:
crwu_path = 'data/crwu_dataset/'

normal = 'normal_baseline/'
drive_end_12k = 'drive_end_bearing_fault_12k_data/'
drive_end_48k = 'drive_end_bearing_fault_48k_data/'
fan_end_12k = 'fan_end_bearing_fault_12k_data/'

fault_diameter = ['007/', '014/', '021/', '028/']

Checking problem caused by how the data is saved.

In [48]:
path = crwu_path + normal
data = scipy.io.loadmat(path+'normal_1.mat')
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'X098_DE_time', 'X098_FE_time'])

In [49]:
data = scipy.io.loadmat(path+'normal_2.mat')
data.keys()

dict_keys(['__header__', '__version__', '__globals__', 'ans', 'X098_DE_time', 'X098_FE_time', 'X099_DE_time', 'X099_FE_time'])

X098 is the same as the `normal_1.mat` equivalent key, therefore should be ignored. 

In [50]:
def rpm_column(df):
    '''Get rpm info.'''
    conditions = [
        (df['fault'].str[-1] == '0'),
        (df['fault'].str[-1] == '1'),
        (df['fault'].str[-1] == '2'),
        (df['fault'].str[-1] == '3'),
    ]
    values = [1797, 1772, 1750, 1730]
    df['rpm'] = np.select(conditions, values)
    return df

In [51]:
path = crwu_path + normal

keys_list = []

df_normal = pd.DataFrame()

old_key = ''
for file in sorted(os.listdir(path)):
    data = scipy.io.loadmat(path+file)

    df_aux = pd.DataFrame()
    for key, value in data.items():
        if ('fe_time' in key.lower()) and (key not in keys_list):
            df_aux['FE'] = data[key].reshape(-1)
        if ('de_time' in key.lower()) and (key not in keys_list):
            df_aux['DE'] = data[key].reshape(-1)

        keys_list.append(key)

    fault = file.replace('.mat', '')
    df_aux['fault'] = fault

    df_normal = pd.concat([df_normal, df_aux], ignore_index=True)

df_normal = rpm_column(df_normal)

In [52]:
df_normal

Unnamed: 0,DE,FE,fault,rpm
0,0.053197,0.145667,normal_0,1797
1,0.088662,0.097796,normal_0,1797
2,0.099718,0.054856,normal_0,1797
3,0.058621,0.036982,normal_0,1797
4,-0.004590,0.054445,normal_0,1797
...,...,...,...,...
1698542,0.035673,0.125122,normal_3,1730
1698543,-0.004590,0.124916,normal_3,1730
1698544,-0.023574,0.109302,normal_3,1730
1698545,0.005215,0.052185,normal_3,1730


In [53]:
bulk_insert_df(conn, df_normal, 'crwu_normal_raw')

In [54]:
base_path = crwu_path + drive_end_12k

fault_dict = {
    'B': 'Ball',
    'I': 'Inner Race',
    'O6': 'Outer Race Centered at 6',
    'O12': 'Outer Race Opposite at 12',
    'O3': 'Outer Race Orthogonal at 3'
}

keys_list = []
paths = [base_path + fault_d for fault_d in fault_diameter]

df = pd.DataFrame()
for path in paths:
    for file in sorted(os.listdir(path)):
        data = scipy.io.loadmat(path+file)

        df_aux = pd.DataFrame()
        for key, value in data.items():
            if ('fe_time' in key.lower()) and (key not in keys_list):
                df_aux['FE'] = data[key].reshape(-1)
            if ('de_time' in key.lower()) and (key not in keys_list):
                df_aux['DE'] = data[key].reshape(-1)
            if ('ba_time' in key.lower()) and (key not in keys_list):
                df_aux['BA'] = data[key].reshape(-1)
            if 'rpm' in key.lower():
                rpm_key = key
            keys_list.append(key)

        fault = file.replace('.mat', '')
        df_aux['fault'] = fault
        df_aux['fault_diameter'] = re.findall(r'(\d{3})', fault)[0]

        fault_type = fault[0]
        if fault_type == 'O':
            if bool(re.search(r'centered', fault)):
                fault_type = 'O6'
            if bool(re.search(r'opposite', fault)):
                fault_type = 'O12'
            if bool(re.search(r'orthogonal', fault)):
                fault_type = 'O3'
        df_aux['fault_type'] = fault_dict[fault_type]

        df = pd.concat([df, df_aux], ignore_index=True)

df_fault_de_12k = rpm_column(df)

In [55]:
df_fault_de_12k

Unnamed: 0,DE,FE,BA,fault,fault_diameter,fault_type,rpm
0,-0.002761,-0.247162,0.015532,B007_0,007,Ball,1797
1,-0.096324,0.142791,0.016940,B007_0,007,Ball,1797
2,0.113705,0.003287,-0.036455,B007_0,007,Ball,1797
3,0.257297,-0.106836,-0.044744,B007_0,007,Ball,1797
4,-0.058314,0.136011,0.007726,B007_0,007,Ball,1797
...,...,...,...,...,...,...,...
7313753,-0.580646,,,IR028_3,028,Inner Race,1730
7313754,-1.376543,,,IR028_3,028,Inner Race,1730
7313755,-1.617834,,,IR028_3,028,Inner Race,1730
7313756,-1.061195,,,IR028_3,028,Inner Race,1730


In [56]:
bulk_insert_df(conn, df_fault_de_12k, 'crwu_de_12k_raw')

In [57]:
base_path = crwu_path + drive_end_48k

fault_dict = {
    'B': 'Ball',
    'I': 'Inner Race',
    'O6': 'Outer Race Centered at 6',
    'O12': 'Outer Race Opposite at 12',
    'O3': 'Outer Race Orthogonal at 3'
}

keys_list = []
paths = [base_path + fault_d for fault_d in fault_diameter]

df = pd.DataFrame()
for path in paths:
    try: 
        for file in sorted(os.listdir(path)):
            data = scipy.io.loadmat(path+file)

            df_aux = pd.DataFrame()
            for key, value in data.items():
                if ('fe_time' in key.lower()) and (key not in keys_list) and ('X217' not in key):
                    df_aux['FE'] = data[key].reshape(-1)
                if ('de_time' in key.lower()) and (key not in keys_list) and ('X217' not in key):
                    df_aux['DE'] = data[key].reshape(-1)
                if ('ba_time' in key.lower()) and (key not in keys_list) and ('X217' not in key):
                    df_aux['BA'] = data[key].reshape(-1)
                if 'rpm' in key.lower():
                    rpm_key = key
                keys_list.append(key)

            fault = file.replace('.mat', '')
            df_aux['fault'] = fault
            df_aux['fault_diameter'] = re.findall(r'(\d{3})', fault)[0]

            fault_type = fault[0]
            if fault_type == 'O':
                if bool(re.search(r'centered', fault)):
                    fault_type = 'O6'
                if bool(re.search(r'opposite', fault)):
                    fault_type = 'O12'
                if bool(re.search(r'orthogonal', fault)):
                    fault_type = 'O3'
            df_aux['fault_type'] = fault_dict[fault_type]

            df_aux['rpm'] = data[rpm_key].reshape(-1)[0]

            df = pd.concat([df, df_aux], ignore_index=True)
    except FileNotFoundError: 
        print(f'{path} does not exist.')

data = scipy.io.loadmat(base_path+'021/IR021_3.mat')

df_aux = pd.DataFrame()
df_aux['FE'] = data['X217_FE_time'].reshape(-1)
df_aux['DE'] = data['X217_DE_time'].reshape(-1)
df_aux['fault'] = 'IR021_3'
df_aux['fault_diameter'] = '021'
df_aux['fault_type'] = 'Inner Race'
df_aux['rpm'] = 1730

df_fault_de_48k = pd.concat([df, df_aux], ignore_index=True)

data/crwu_dataset/drive_end_bearing_fault_48k_data/028/ does not exist.


In [58]:
df_fault_de_48k

Unnamed: 0,DE,FE,fault,fault_diameter,fault_type,rpm
0,-0.111192,-0.095125,B007_0,007,Ball,1796
1,-0.083029,-0.072115,B007_0,007,Ball,1796
2,-0.042349,-0.026093,B007_0,007,Ball,1796
3,0.008970,0.042324,B007_0,007,Ball,1796
4,0.057578,0.122862,B007_0,007,Ball,1796
...,...,...,...,...,...,...
21416804,0.249087,0.188196,IR021_3,021,Inner Race,1730
21416805,0.111401,0.319482,IR021_3,021,Inner Race,1730
21416806,-0.035256,0.407416,IR021_3,021,Inner Race,1730
21416807,-0.168144,0.455698,IR021_3,021,Inner Race,1730


In [59]:
bulk_insert_df(conn, df_fault_de_48k, 'crwu_de_48k_raw')

In [60]:
base_path = crwu_path + fan_end_12k

fault_dict = {
    'B': 'Ball',
    'I': 'Inner Race',
    'O6': 'Outer Race Centered at 6',
    'O12': 'Outer Race Opposite at 12',
    'O3': 'Outer Race Orthogonal at 3'
}

keys_list = []
paths = [base_path + fault_d for fault_d in fault_diameter]

df = pd.DataFrame()
for path in paths:
    try: 
        for file in sorted(os.listdir(path)):
            data = scipy.io.loadmat(path+file)

            df_aux = pd.DataFrame()
            for key, value in data.items():
                if ('fe_time' in key.lower()) and (key not in keys_list):
                    df_aux['FE'] = data[key].reshape(-1)
                if ('de_time' in key.lower()) and (key not in keys_list):
                    df_aux['DE'] = data[key].reshape(-1)
                if ('ba_time' in key.lower()) and (key not in keys_list):
                    df_aux['BA'] = data[key].reshape(-1)
                if 'rpm' in key.lower():
                    rpm_key = key
                keys_list.append(key)

            fault = file.replace('.mat', '')
            df_aux['fault'] = fault
            df_aux['fault_diameter'] = re.findall(r'(\d{3})', fault)[0]

            fault_type = fault[0]
            if fault_type == 'O':
                if bool(re.search(r'centered', fault)):
                    fault_type = 'O6'
                if bool(re.search(r'opposite', fault)):
                    fault_type = 'O12'
                if bool(re.search(r'orthogonal', fault)):
                    fault_type = 'O3'
            df_aux['fault_type'] = fault_dict[fault_type]

            df_aux['rpm'] = data[rpm_key].reshape(-1)[0]

            df = pd.concat([df, df_aux], ignore_index=True)
    except FileNotFoundError: 
        print(f'{path} does not exist.')


data/crwu_dataset/fan_end_bearing_fault_12k_data/028/ does not exist.


In [61]:
df_fault_fe_12k = df
df_fault_fe_12k

Unnamed: 0,DE,FE,BA,fault,fault_diameter,fault_type,rpm
0,-0.168120,0.319666,0.017306,B007_0,007,Ball,1798
1,0.181278,0.326170,-0.003703,B007_0,007,Ball,1798
2,0.044345,-0.260481,-0.169119,B007_0,007,Ball,1798
3,-0.270454,0.031056,-0.069467,B007_0,007,Ball,1798
4,-0.138070,0.446980,0.089832,B007_0,007,Ball,1798
...,...,...,...,...,...,...,...
5458054,0.091857,0.155118,0.174671,OR021_orthogonal_at_3_3,021,Outer Race Orthogonal at 3,1728
5458055,0.028101,-0.093087,-0.261744,OR021_orthogonal_at_3_3,021,Outer Race Orthogonal at 3,1728
5458056,-0.061238,-0.144956,-0.115280,OR021_orthogonal_at_3_3,021,Outer Race Orthogonal at 3,1728
5458057,0.079918,0.077803,0.246575,OR021_orthogonal_at_3_3,021,Outer Race Orthogonal at 3,1728


In [62]:
bulk_insert_df(conn, df_fault_fe_12k, 'crwu_fe_12k_raw')

In [63]:
conn.close()