In [1]:
import os
import numpy as np
import pandas as pd
import itertools  
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor

from bokeh.plotting import figure
from bokeh.io import show, output_notebook, export
from bokeh.layouts import row, column, gridplot
from bokeh.palettes import Dark2_5 as palette
from bokeh.models.glyphs import Step
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt

# from utils_cython.data_utils_c import derivative
from utils.data_utils import (csvs_merge, cumsum, step_change_point, get_explosion_index,
                              hankel_svd, correlation_coeffs, Bearing)

# Set to output the plot in the notebook.
output_notebook();

%load_ext autoreload
%autoreload 2

In [2]:
bearings_to_read = ['Bearing1_1', 'Bearing1_2', 'Bearing1_3', 'Bearing1_4', 'Bearing1_5', 
                    'Bearing1_6', 'Bearing1_7', 'Bearing2_1', 'Bearing2_2', 'Bearing2_3', 
                    'Bearing2_4', 'Bearing2_5', 'Bearing2_6', 'Bearing2_7', 'Bearing3_1', 
                    'Bearing3_2', 'Bearing3_3']

dataset = 'femto_dataset'
bearings = []
step = 2560
for bearing_to_read in bearings_to_read:
    data = {'vib' : pd.read_csv('data/processed_data/%s/%s/acc_merged.csv' % (dataset, bearing_to_read))}
    
    # Reads 'temperature' if the data exists.
    if os.path.exists('data/processed_data/%s/temp_merged.csv' % (bearing_to_read)):
            data['temp'] = pd.read_csv('data/processed_data/%s/%s/temp_merged.csv' % (dataset, bearing_to_read))
    
    bearings.append(Bearing(name=bearing_to_read, dataset=dataset, condition=bearing_to_read[7],
                            data=data, restore_results=False))

In [3]:
# Starting with cumsum and derivative analysis.
for bearing in bearings:
    # Computing cumsum.
    bearing.results['cumsum'] = {'h' : cumsum(bearing.data['vib']['h_acc']), 
                                 'v' : cumsum(bearing.data['vib']['v_acc'])}

In [4]:
for bearing in bearings:
    # Compute hankel matrix singular values.
    bearing.results['hankel_svd'] = {'h' : hankel_svd(bearing.data['vib']['h_acc'], hankel_window_size=9,
                                                      slice_window_size=len(bearing.data['vib']['h_acc'])//step),
                                     
                                     'v' : hankel_svd(bearing.data['vib']['v_acc'], hankel_window_size=9, 
                                                      slice_window_size=len(bearing.data['vib']['v_acc'])//step)}
    
    # Compute correlation coefficients.
    bearing.results['hankel_svd_correlation_coeffs'] = {'h' : correlation_coeffs(bearing.results['hankel_svd']['h'], 
                                                        baseline_percentage=5, norm_interval=[-1, 1]),
                                                  
                                                        'v' : correlation_coeffs(bearing.results['hankel_svd']['v'],
                                                        baseline_percentage=5, norm_interval=[-1, 1])}

In [33]:
def do_all_stuff(bearing_train, bearing_test, qtd, degree):
    
    # ---- TRAIN ---- #
    hankel_spot = pd.DataFrame(bearing_train.results['hankel_svd_correlation_coeffs']['v'], columns=['hankel_v'])
    aux = hankel_spot.query('hankel_v < 0.6').index[0]
    
    expon = bearing_train.results['cumsum']['v'][step*get_explosion_index(hankel_spot, aux):]
    
    scaler = MinMaxScaler()
    expon = expon.values.reshape(-1,1)
    expon = scaler.fit_transform(expon)
    expon[-1]
    expon = np.hstack(expon)
    
    target_reg_expon = np.linspace(1, 0, len(expon))
    
    step_temp = len(expon)//qtd
    tamanho = step_temp
    coefs = []
    for window in range(0, qtd):
        janela = expon[window*step_temp:(window+1)*step_temp]
        jan_target = target_reg_expon[window*step_temp:(window+1)*step_temp]
        coefs.append(np.polyfit(janela, jan_target, degree))
        
    target_reg_expon = np.linspace(1, 0, len(coefs))

    reg = RandomForestRegressor(n_estimators=100, random_state=42)
    reg.fit(coefs, target_reg_expon)
    reg.score(coefs, target_reg_expon)
    score_train = reg.score(coefs, target_reg_expon)
    # ---- TEST ---- #
    
    hankel_spot = pd.DataFrame(bearing_test.results['hankel_svd_correlation_coeffs']['v'], columns=['hankel_v'])
    aux = hankel_spot.query('hankel_v < 0.6').index[0]
    
    linear = bearing_test.results['cumsum']['v'][0:step*get_explosion_index(hankel_spot, aux)]
    expon = bearing_test.results['cumsum']['v'][step*get_explosion_index(hankel_spot, aux):]
    
    scaler = MinMaxScaler()
    expon = expon.values.reshape(-1,1)
    expon = scaler.fit_transform(expon)
    expon[-1]
    expon = np.hstack(expon)
    
    target_reg_expon = np.linspace(1, 0, len(expon))
    
    step_temp = len(expon)//qtd
    tamanho = step_temp
    coefs = []
    for window in range(0, qtd):
        janela = expon[window*step_temp:(window+1)*step_temp]
        jan_target = target_reg_expon[window*step_temp:(window+1)*step_temp]
        coefs.append(np.polyfit(janela, jan_target, degree))
        
    target_reg_expon = np.linspace(1, 0, len(coefs))
    
    score_test = reg.score(coefs, target_reg_expon)
    
    return score_test

In [34]:
scores = []; counter = 0
for bearing_1 in bearings: 
    scores_intern = []
    for bearing_2 in bearings:
        counter += 1; print(counter)
        score_test = do_all_stuff(bearing_1, bearing_2, qtd=1000, degree=2)
        print(score_test)
        scores_intern.append(score_test)
    
    scores.append(scores_intern)

1
0.9974195731943731
2
0.9582352373212373
3
0.9371913498117499
4
0.23979760618120638
5
0.7174738533946534
6
0.9133137118761118
7
0.942554384026784
8
0.9595070040878041
9
0.289689953889954
10
0.9241041427857426
11
0.772779723909724
12
0.9733049742689744
13
0.08173316232716221
14
0.8077749444141443
15
0.9673254185514185
16
0.9624109664269664
17
0.9629640869628869
18
0.9977847916227917
19
0.9694931664983665
20
0.5854766470394472
21
0.6551974594618594
22
0.9499339917523918
23
0.912137185057585
24
0.9616316129492128
25
0.42045710304830314
26
0.9549371503007502
27
0.6362037712281711
28
0.9770805531873532
29
0.017439344872545726
30
0.7779935032691031
31
0.9653158256890256
32
0.9761837088017088
33
0.6165615052095053
34
0.6934905935349935
35
0.9984714339522339
36
0.7313815891123889
37
0.6805861323481324
38
0.714401378000978
39
0.7417129213633213
40
0.5951027060087062
41
0.38232014742734743
42
0.3412967816219816
43
-0.1055421294453296
44
0.7699580805152806
45
0.3851428921516921
46
0.768415556041

In [32]:
bearings_names = ['Bearing1_1', 'Bearing1_2', 'Bearing1_4', 'Bearing1_5', 
                    'Bearing1_6', 'Bearing1_7', 'Bearing2_1', 'Bearing2_2', 'Bearing2_3', 
                    'Bearing2_4', 'Bearing2_5', 'Bearing2_6', 'Bearing2_7', 'Bearing3_1', 
                    'Bearing3_2', 'Bearing3_3']

scores_df = pd.DataFrame(scores, columns=bearings_names)
scores_df.to_csv('test.csv')