In [2]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestRegressor

from utils import data_model
from utils.preprocessing import get_explosion_index

%load_ext autoreload
%autoreload 2

In [2]:
dataset_path = 'data/processed_data/femto_dataset'

# 'all' or a list of bearings name (e.g. 'all' or ['Bearing1_1', 'Bearing2_5', 'Bearing3_1', ...])
bearings_to_load = ['Bearing1_1', 'Bearing1_2', 'Bearing1_4', 'Bearing1_5', 'Bearing1_6',
                    'Bearing1_7', 'Bearing2_1', 'Bearing2_2', 'Bearing2_3', 'Bearing2_4',
                    'Bearing2_5', 'Bearing2_6', 'Bearing2_7', 'Bearing3_1', 'Bearing3_2', 
                    'Bearing3_3']

# 'all' or a list of data names (e.g. 'all' or ['acc', 'temp', 'cumsum', 'fft_spectogram', ...])
# results from 'data_utils' functions have the same data name of its corresponding function 
data_to_load = ['cumsum_v', 'correlation_coeffs_v']

bearings = data_model.load(dataset_path, bearings_to_load, data_to_load)

---

In [13]:
def train_test_rf(train, test, qtd, degree):
    
    
    scaler = MinMaxScaler()
    reg = RandomForestRegressor(n_estimators=100, random_state=42)


    # Train
    hankel_spot = pd.DataFrame(train.data['correlation_coeffs_v'], columns=['hankel_v'])
    aux = hankel_spot.query('hankel_v < 0.6').index[0]
    explosion_index = get_explosion_index(hankel_spot, aux)

    expon = train.data['cumsum_v'][step*explosion_index:]
    expon = expon.values.reshape(-1, 1)
    expon = scaler.fit_transform(expon)
    expon = np.hstack(expon)

    target_expon = np.linspace(1, 0, len(expon))
    step_temp = len(expon)//qtd

    coeffs = []
    for i in range(0, qtd):
        x = expon[i*step_temp : (i+1)*step_temp]
        y = target_expon[i*step_temp : (i+1)*step_temp]
        coeffs.append(np.polyfit(x, y, degree))

    target_expon = np.linspace(1, 0, len(coeffs))
    reg.fit(coeffs, target_expon)
    score_train = reg.score(coeffs, target_expon)


    # Test
    hankel_spot = pd.DataFrame(test.data['correlation_coeffs_v'], columns=['hankel_v'])
    aux = hankel_spot.query('hankel_v < 0.8').index[0]

    explosion_index = get_explosion_index(hankel_spot, aux)
    linear = test.data['cumsum_v'][0:step*explosion_index]
    expon  = test.data['cumsum_v'][step*explosion_index:]

    expon = expon.values.reshape(-1, 1)
    expon = scaler.fit_transform(expon)
    expon = np.hstack(expon)

    target_expon = np.linspace(1, 0, len(expon))

    step_temp = len(expon)//qtd

    coeffs = []
    for i in range(0, qtd):
        x = expon[i*step_temp:(i+1)*step_temp]
        y = target_expon[i*step_temp:(i+1)*step_temp]
        coeffs.append(np.polyfit(x, y, degree))

    target_expon = np.linspace(1, 0, len(coeffs))

    score_test = reg.score(coeffs, target_expon)

    return score_test

In [14]:
scores = []
counter = 0; total = len(bearings)**2
step = 2560

for train in bearings: 
    scores_intern = []
    
    for test in bearings:
        score_test = train_test_rf(train, test, qtd=1000, degree=2)
        scores_intern.append(score_test)
        
        print('Iteration %s/%s' % (counter, total))
        print('Train: %s \nTest: %s \nScore: %s\n' %(train.name, test.name, score_test))
        counter += 1
        
    scores.append(scores_intern)

Iteration 0/289
Train: Bearing3_3 
Test: Bearing3_3 
Score: 0.9973868401088403

Iteration 1/289
Train: Bearing3_3 
Test: Bearing1_1 
Score: 0.7754233465909465

Iteration 2/289
Train: Bearing3_3 
Test: Bearing1_7 
Score: 0.9587718022230022

Iteration 3/289
Train: Bearing3_3 
Test: Bearing1_3 
Score: -0.1938445098469113

Iteration 4/289
Train: Bearing3_3 
Test: Bearing3_2 
Score: 0.8855135499995499

Iteration 5/289
Train: Bearing3_3 
Test: Bearing2_3 
Score: 0.47540250519690536

Iteration 6/289
Train: Bearing3_3 
Test: Bearing3_1 
Score: 0.775909075102675

Iteration 7/289
Train: Bearing3_3 
Test: Bearing2_6 
Score: 0.8610292465468465

Iteration 8/289
Train: Bearing3_3 
Test: Bearing2_4 
Score: 0.8231911072555071

Iteration 9/289
Train: Bearing3_3 
Test: Bearing2_1 
Score: 0.9581234853506854

Iteration 10/289
Train: Bearing3_3 
Test: Bearing2_5 
Score: 0.6291706275646274



KeyboardInterrupt: 

In [None]:
bearings_names = ['Bearing1_1', 'Bearing1_2', 'Bearing1_4', 'Bearing1_5', 'Bearing1_6',
                  'Bearing1_7', 'Bearing2_1', 'Bearing2_2', 'Bearing2_3', 'Bearing2_4',
                  'Bearing2_5', 'Bearing2_6', 'Bearing2_7', 'Bearing3_1', 'Bearing3_2', 
                  'Bearing3_3']

scores_df = pd.DataFrame(scores, columns=bearings_names)
scores_df.to_csv('regression.csv')