In [1]:
import os
import numpy as np
import pandas as pd
import itertools  
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

from bokeh.plotting import figure
from bokeh.io import show, output_notebook, export
from bokeh.layouts import row, column, gridplot
from bokeh.palettes import Dark2_5 as palette
from bokeh.models.glyphs import Step

from utils_cython.data_utils_c import derivative
from utils.data_utils import (csvs_merge, cumsum, step_change_point, 
                              hankel_svd, correlation_coeffs, Bearing)

%load_ext autoreload
%autoreload 2

##### Create folders for processed data and merge all .csv files of each bearing in FEMTO dataset.

In [None]:
files_info = {
    # file type identifier, columns name, columns to read.
    'acc'  : {'usecols' : [0, 1, 2, 4, 5], 'names' : ['hour', 'min', 'seg', 'h_acc', 'v_acc']},
    'temp' : {'usecols' : [0, 1, 2, 4],    'names' : ['hour', 'min', 'seg', 'temp']}
}

for bearing in os.listdir('data/original_data/femto_dataset/'):
    # Creating folders for processed data.
    os.mkdir('data/processed_data/%s' % (bearing))
    
    # Merging .csv files.
    csvs_merge('data/original_data/femto_dataset/%s' % (bearing), files_info, bearing)

##### Create bearings objects and read merged files.

In [2]:
"""bearings_to_read = ['Bearing1_1', 'Bearing1_2', 'Bearing1_3', 'Bearing1_4', 'Bearing1_5', 
                       'Bearing1_6', 'Bearing1_7', 'Bearing2_1', 'Bearing2_2', 'Bearing2_3', 
                       'Bearing2_4', 'Bearing2_5', 'Bearing2_6', 'Bearing2_7', 'Bearing3_1', 
                       'Bearing3_2', 'Bearing3_3']
"""
bearings_to_read = ['Bearing1_1', 'Bearing1_4', 'Bearing2_2']

bearings = []
for bearing_to_read in bearings_to_read:
    data = {'vib' : pd.read_csv('data/processed_data/%s/acc_merged.csv' % (bearing_to_read))}
    
    # Reads 'temperature' if the data exists.
    if os.path.exists('data/processed_data/%s/temp_merged.csv' % (bearing_to_read)):
            data['temp'] = pd.read_csv('data/processed_data/%s/temp_merged.csv' % (bearing_to_read))
    
    bearings.append(Bearing(name=bearing_to_read, dataset='femto', 
                            condition=bearing_to_read[7], data=data, restore_results=True))


##### Change state point analysis.

###### Derivative.

In [3]:
# Starting with cumsum and derivative analysis.
for bearing in bearings:
    # Computing cumsum.
    bearing.results['cumsum'] = {'h' : cumsum(bearing.data['vib']['h_acc']), 
                                 'v' : cumsum(bearing.data['vib']['v_acc'])}
    
    # Computing cumsum derivative.
    h = 39*10**-6 # Distance between points - It's in original data in u-sec column.
    bearing.results['cs_derivative'] = {'h' : np.asarray(derivative(bearing.results['cumsum']['h'].values, h)), 
                                        'v' : np.asarray(derivative(bearing.results['cumsum']['v'].values, h))}
    
    # Marking change points (cp) in derivative.
    bearing.results['cs_deriv_cp'] = {'h' : step_change_point(bearing.results['cs_derivative']['h']),
                                      'v' : step_change_point(bearing.results['cs_derivative']['v'])}

In [6]:
# Plotting cumsum and saving plots.
s_cs = []
colors = itertools.cycle(palette)
for i, bearing in enumerate(bearings):
    
    s = figure(plot_width = 500, plot_height = 500, 
                  title = 'Cumulative Sum.',
                  x_axis_label = 'Recordings', y_axis_label = 'Cumulative Sum')
    
    for (label_l, data), color in zip(bearing.results['cumsum'].items(), colors):
        # Get each data point after sample_step. 
        x = np.arange(len(data)//2560); data = data[::2560]
        # Add circle glyph.
        s.circle(x=x, y=data, color=color, size=1, legend_label="%s, %s" %(bearing.name, label_l))
    
    s.legend.location = "top_left"
    
    s_cs.append(s)
    
# Set to output the plot in the notebook and show the plot.
output_notebook(); show(column(s_cs))

In [7]:
# Plotting derivative change points.
s_deriv = []
colors = itertools.cycle(palette)
for bearing in bearings:
    s = figure(plot_width = 500, plot_height = 500, 
       title = 'Horizontal and vertical vibration derivative.',
       x_axis_label = 'Change point', y_axis_label = 'Derivative')
    
    for (label_l, data), color in zip(bearing.results['cs_deriv_cp'].items(), colors):
        x = np.arange(len(data)//2560); data = data[::2560]
        # Add step glyph.
        s.step(x=x, y=data, mode='center', line_color=color, legend_label="%s, %s" %(bearing.name, label_l))
    
    s.legend.location = "top_left"
    
    s_deriv.append(s)

# Set to output the plot in the notebook and show the plot.
output_notebook(); show(row(column(s_deriv), column(s_cs)))

###### Mao et. al. - Correlation coefficient.
<sub>Mao, W., He, J., Tang, J. and Li, Y., 2018. Predicting remaining useful life of rolling bearings based on deep feature representation and long short-term memory neural network. Advances in Mechanical Engineering, 10(12), p.1687814018817184.

In [8]:
for bearing in bearings:
    # Compute hankel matrix singular values.
    bearing.results['hankel_svd'] = {'h' : hankel_svd(bearing.data['vib']['h_acc'], window_size=9,
                                                      n_samples=len(bearing.data['vib']['h_acc'])//2560),
                                     
                                     'v' : hankel_svd(bearing.data['vib']['v_acc'], window_size=9, 
                                                      n_samples=len(bearing.data['vib']['v_acc'])//2560)}
    
    # Compute correlation coefficients.
    bearing.results['hankel_svd_correlation_coeffs'] = {'h' : correlation_coeffs(bearing.results['hankel_svd']['h'], 
                                                        baseline=0, norm_interval=[-1, 1]),
                                                  
                                                 'v' : correlation_coeffs(bearing.results['hankel_svd']['v'],
                                                        baseline=0, norm_interval=[-1, 1])}

In [9]:
# Plotting Hankel matrix singular values correlation coefficients.
s_h_svd = []
colors = itertools.cycle(palette)
for bearing in bearings:
    s = figure(plot_width = 500, plot_height = 500, 
           title = 'Hankel matrix singular values correlation coefficients.',
           x_axis_label = 'Samples', y_axis_label = 'Correlation coefficients')
           #x_range=s_cs.x_range)
    
    for (label_l, data), color in zip(bearing.results['hankel_svd_correlation_coeffs'].items(), colors):
        x = np.arange(len(data))
        # Add circle glyph.
        s.circle(x=x, y=data, color=color, legend_label="%s, %s" % (bearing.name, label_l))
    
    s.legend.location = "bottom_left"
    
    s_h_svd.append(s)
    
# Set to output the plot in the notebook and show the plot.
output_notebook(); show(row(column(s_h_svd), column(s_cs)))

###### PCA
<sub> https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html

In [11]:
pca = PCA(n_components=2)
for bearing in bearings:
    # Adjusting shape and zero mean.
    h = np.squeeze(StandardScaler(with_std=False).fit_transform(bearing.results['cumsum']['h'].values.reshape(-1 ,1)))
    v = np.squeeze(StandardScaler(with_std=False).fit_transform(bearing.results['cumsum']['v'].values.reshape(-1 ,1)))
    
    # Adding np.arange column and compute PCA.
    bearing.results['cs_pca'] = {'h' : pca.fit_transform(list(zip(np.arange(len(h)), h))), 
                                 'v' : pca.fit_transform(list(zip(np.arange(len(v)), v)))} 

In [16]:
# Plotting pca components.
s_pca = []
colors = itertools.cycle(palette)
for bearing in bearings:
    
    s = figure(plot_width = 500, plot_height = 500, 
           title = 'PCA Components.',
           x_axis_label = 'Component 1', y_axis_label = 'Component 2')
    
    for (label_l, data), color in zip(bearing.results['cs_pca'].items(), colors):
        # Get each data point after sample_step. 
        data = data[::2560]
        # Add circle glyph.
        s.circle(x=data[:, 0], y=data[:, 1], color=color, legend_label="%s, %s" %(bearing.name, label_l))

    s.legend.location = "top_left"
    
    s_pca.append(s)
    
# Set to output the plot in the notebook and show the plot.
output_notebook(); show(row(column(s_pca), column(s_cs)))

In [None]:
# Saving results to binnary.
for bearing in bearings:
    bearing.save_r()

In [15]:
# Plotting all results.  
# Set to output the plot in the notebook and show the plot.
output_notebook(); 
show((row(column(s_cs), column(s_deriv), column(s_h_svd), column(s_pca))))

# https://docs.bokeh.org/en/latest/docs/user_guide/export.html?highlight=export
export.export_png(row(column(s_cs), column(s_deriv), column(s_h_svd), column(s_pca)))

'/tmp/tmpj4pu1anl.png'