# Bearing Life Prediction Experiment
This is an example of using the the Seeq/Python module for applying Machine Learning models to cleansed data from Seeq.

# Project Setup
## Install dependencies

In [None]:
import sys
!{sys.executable} -m pip install seaborn
!{sys.executable} -m pip install seeq

## Import Libraries

In [None]:
import json
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

from numpy.random import seed
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras.layers import Input
from tensorflow.keras.layers import Dense 
from tensorflow.keras import Sequential
from tensorflow.keras import regularizers

from seeq import spy

In [None]:
from azureml.core import Workspace 
from azureml.core import Experiment
from azureml.mlflow import register_model
from azureml.core import Dataset, Datastore
from azureml.data.datapath import DataPath

experiment_name = 'Bearing_Failure_Prediction'
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, sep='\t')

In [None]:
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name=experiment_name)
run = experiment.start_logging()

# Get the Data

In [None]:
# Get the keyvault associated with this notebook
from azureml.core import Workspace
ws = Workspace.from_config()
keyvault = ws.get_default_keyvault()

## Log into Seeq with SPy

In [None]:
# Get the credentials from the secrets store
access_key_name = keyvault.get_secret(name="accesskey")
access_pwd = keyvault.get_secret(name="pwd")

# Login to Seeq
spy.login(url='https://explore.seeq.com/', username=access_key_name, password=access_pwd, ignore_ssl_errors=True)

## Search for Bearing Signals

In [None]:
vibration_signals = spy.search({
    'Name': '/^Vibration$/',
    'Path': 'Compressors Vibration Probes >> C23',
    'Scoped To': '7CC84C64-A14A-4DA2-AA6F-4C56733FADB2'
},)
# workbook = 'Seeq Integration with Python Machine Learning'
vibration_signals

Find the condition giving time periods of normal operation...

In [None]:
bearing_norm_operation = spy.search({
    'Name': '/^Training Window$/',
    'Scoped To': '7CC84C64-A14A-4DA2-AA6F-4C56733FADB2'
},)
# workbook = 'Seeq Integration with Python Machine Learning'
bearing_norm_operation = bearing_norm_operation.iloc[0]

## Pull Bearing Signal Data for the Window

First pull the Bearing Normal Operation Condition to find the timeperiod over which to retrive the data to train the model.

In [None]:
df_cond = spy.pull(bearing_norm_operation,start='2020-04-01 12:00:00', end='2020-07-15 12:00:00', grid=None)
start_time, end_time = df_cond.loc[0,['Capsule Start', 'Capsule End']]
df_cond

Next pull the bearing vibration data.

In [None]:
df = spy.pull(vibration_signals, start=start_time, end=end_time, grid=None)
# create tabular dataset from Parquet files in datastore
datastore = ws.get_default_datastore()
df.head()

In [None]:
df.plot(figsize=(24,6), rot=60)

## Register the dataset

In [None]:
input_dataset = Dataset.Tabular.register_pandas_dataframe(df, datastore, name="bearing_life_training_data_set", description="Vibration data for training", show_progress=True)

# Develop the Machine Learning Model

## Standardize data

In [None]:
import pickle

# Standardization
names = df.columns
index = df.index
scaler = StandardScaler().fit(df)
scaled_df = pd.DataFrame(scaler.transform(df), index=index, columns=names)
pickle.dump(scaler, open('scaler.pkl','wb'))
run.upload_file(name='bearing_failure_exp_scaler', path_or_stream='scaler.pkl')

In [None]:
scaled_df.plot(figsize=(12,6), rot=60)

## Build the Artificial Neural Network

In [None]:
seed(9) # global seed for numpy
tf.random.set_seed(10) # global seed for tensorflow
activation = 'elu'
run.log("activation", activation)

# Input layer:
model=Sequential()
# First hidden layer, connected to input vector X. 
model.add(Dense(10,activation=activation,
                kernel_initializer='glorot_uniform',
                kernel_regularizer=regularizers.l2(0.0),
                input_shape=(scaled_df.shape[1],)
               )
         )

model.add(Dense(2,activation=activation,
                kernel_initializer='glorot_uniform'))

model.add(Dense(10,activation=activation,
                kernel_initializer='glorot_uniform'))

model.add(Dense(scaled_df.shape[1],
                kernel_initializer='glorot_uniform'))

model.compile(loss='mse',optimizer='adam')
run.log("optimizer", 'adam')
run.log("kernel_initalizer", 'glorot_uniform')


## Train Model

In [None]:
# Train model for 100 epochs, batch size of 10: 
NUM_EPOCHS=100
BATCH_SIZE=10
X_train = scaled_df  # .sample(frac=1)
history=model.fit(np.array(X_train),np.array(X_train),
                  batch_size=BATCH_SIZE, 
                  epochs=NUM_EPOCHS,
                  validation_split=0.05,
                  verbose = 1)
model.save('bearing_model.h5')
run.upload_file(name='bearing_failure_exp_model', path_or_stream='bearing_model.h5')

In [None]:
plt.figure()
plt.plot(history.history['loss'], 'b', label='Training loss')
plt.plot(history.history['val_loss'], 'r', label='Validation loss')
plt.legend(loc='upper right')
plt.xlabel('Epochs')
plt.ylabel('Loss, [mse]')
plt.show()
run.log_image('Loss', plot=plt)

Determine the mean absolute errors and look at their distribution in the training set

In [None]:
X_pred = model.predict(np.array(scaled_df))
X_pred = pd.DataFrame(X_pred, columns=scaled_df.columns)
X_pred.index = scaled_df.index

scored = pd.DataFrame(index=scaled_df.index)
scored['Loss_mae'] = np.mean(np.abs(X_pred-scaled_df), axis = 1)
plt.figure()
sns.histplot(scored['Loss_mae'],
             bins = 10, 
             kde= True,
            color = 'blue');
run.log_image('Loss MAE', plot=plt)

In [None]:
scored.plot(figsize=(12,6), rot=60)

Pick a threshold above the noise level from the above distribution

In [None]:
# pick a threshold above the noise level from the above distribution
thrs = 1.6
run.log("threshold", thrs)

# Apply ML Trained Model to New Data

As a Data Scientist you might want to validate your model though with test data

In [None]:
# Aux function to visualize MAE with respect to selected threshold
def loss_threshold_visualization(new_df, threshold, predictor):
    X_pred = predictor.predict(np.array(new_df))
    X_pred = pd.DataFrame(X_pred, columns=new_df.columns)
    X_pred.index = new_df.index

    scored = pd.DataFrame(index=new_df.index)
    scored['Loss_mae'] = np.mean(np.abs(X_pred-new_df), axis = 1)
    scored['Threshold'] = threshold
    scored['Anomaly'] = scored['Loss_mae'] > scored['Threshold']
    return scored

Create a function to apply the trained model to new data

In [None]:
def anomaly_detection(new_df, threshold, predictor):
    X_pred = predictor.predict(np.array(new_df))
    X_pred = pd.DataFrame(X_pred, columns=new_df.columns)
    X_pred.index = new_df.index

    loss = np.mean(np.abs(X_pred-new_df), axis = 1)
    score = loss > threshold
    return pd.DataFrame(score * 1, index=new_df.index, columns=['BearingStatus'])


## Validate the model
We can validate with data outside of the training period. Make another call to get the data for a larger time period

In [None]:
# Make a new call to Seeq for new data 
new_df = spy.pull(vibration_signals, start='2020-04-15 12:00:00', end='2020-07-05 12:00:00', grid=None)
new_df.head()

Scale the new df but make sure it is scaled using the previous parameters from the scaler used for the training set

In [None]:
# It is important to used the previously defined standarization scaler
new_scaled_df = pd.DataFrame(scaler.transform(new_df), 
                             index=new_df.index, 
                             columns=new_df.columns)

In [None]:
# to visualize MAE with respect to the chosen threshold
loss_level = loss_threshold_visualization(new_scaled_df, thrs, model)

In [None]:
loss_level.plot(figsize=(12,6), rot=60)

# Create and push a new signal with the status of the bearing health


Status 0 means "normal" Status 1 means "abnormal"

In [None]:
status = anomaly_detection(new_scaled_df, thrs, model)
output_dataset = Dataset.Tabular.register_pandas_dataframe(status, datastore, name="bearing_life_output_set", description="Output after training", show_progress=True)
status.head()

In [None]:
status.plot(figsize=(12,6), rot=60)

## Register the Model

In [None]:
from azureml.core import Model
from azureml.core.resource_configuration import ResourceConfiguration
details = run.get_details()
runId = details['runId']

model = run.register_model(model_name='bearing-life-tensorflow-model',
                          model_path='bearing_failure_exp_model', 
                          model_framework=Model.Framework.TENSORFLOW, 
                          datasets=[('training_data_set', input_dataset),('output_data_set', output_dataset)],
                          model_framework_version=keras.__version__, 
                          description='Bearing life model',
                          tags={'area': 'Bearing Life', 'type': 'ANN'})


In [None]:
model.update(sample_input_dataset=input_dataset, sample_output_dataset=output_dataset)

## Push signal back to Seeq

In [None]:
#create a status signal in Seeq

spy.push(status, metadata=pd.DataFrame({
    'Interpolation Method': {
        'BearingStatus': 'step'
    },
    'Type': 'Signal',
    'Name': 'Bearing Status',
    'Model Version': model.version
}), workbook='7CC84C64-A14A-4DA2-AA6F-4C56733FADB2', worksheet = '7. Bearing Status Prediction Output')

In [None]:
run.complete()