# 設備の残存耐用時間(RUL)を予測する時系列モデリング
本Notebookでは、豊富な計算環境が用意されているAzure Machine Learning service の Machine Learning Compute のコンピューティング環境を用いて、高速に深層学習(LSTM)を行います。設備の残存耐用時間を予測する時系列モデルを構築します。

## 故障予測のアプローチ方法

故障予測のアプローチ方法は色々ありますが、代表的なアプローチを下記に記載しました。本Notebookでは、設備の残存耐用時間(RUL)を予測する深層学習モデルを構築するアプローチを採用しています。いずれのアプローチにも言えることですが、故障を予測するのではなく、故障する予兆を予測することが大事です。

<img src="../../docs/images/RUL.png" align="left" width=550>

## 使用するデータ

<img src="../../docs/images/PowerBI-RUL.png" align="left" width=550>

## Azure ML Workspaceへ接続
Azure Machine Learning service ワークスペースへ接続します。

In [1]:
from azureml.core import Workspace, Experiment

subscription_id = '9c0f91b8-eb2f-484c-979c-15848c098a6b'
resource_group = 'mlservice'
workspace_name = 'azureml'

workspace = Workspace(subscription_id, resource_group, workspace_name)

## 実験名の設定

In [2]:
experiment = Experiment(workspace = workspace, name = "lstm-rul-aml")

## クラウドにデータをアップロード
学習で使用するデータをオンプレミスからクラウドにアップロードします

In [3]:
ds = workspace.get_default_datastore()
ds.upload(src_dir='./data', target_path='data', overwrite=False, show_progress=True)

Uploading an estimated of 2 files
Target already exists. Skipping upload for data/test.csv
Target already exists. Skipping upload for data/train.csv
Uploaded 0 files


$AZUREML_DATAREFERENCE_170552d02f6e432b85c2287a0c508596

## 学習コード準備

In [4]:
import os
project_folder = "./keras-lstm"
os.makedirs(project_folder, exist_ok=True)

In [23]:
%%writefile {project_folder}/keras_lstm.py

import tensorflow as tf

from tensorflow.python.keras.layers import Conv2D, MaxPooling2D
from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Input, Dense, Dropout, LSTM
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.utils import plot_model
from tensorflow.keras.callbacks import Callback


import os
import pandas as pd
import numpy as np

from azureml.core import Run
from azureml.core import Workspace, Dataset
from keras.utils import plot_model
import argparse


#from keras import initializers, regularizers, constraints, optimizers, layers, callbacks


np.random.seed(1234)  
PYTHONHASHSEED = 0

from azureml.core import Run
run = Run.get_context()

parser = argparse.ArgumentParser(description='keras lstm example:')
parser.add_argument('--epochs', '-e', type=int, default=10, help='Number of sweeps over the dataset to train')
parser.add_argument('--batchsize', '-b', type=int, default=32, help='Number of images in each mini-batch')
parser.add_argument('--dataset', '-d', dest='data_folder',help='The datastore')
args = parser.parse_args()

train_df = pd.read_csv(args.data_folder+"/data/train.csv", sep=",", header=0)
train_df['RUL'] = train_df['RUL'].astype(float)
test_df = pd.read_csv(args.data_folder+"/data/test.csv", sep=",", header=0)
train_df['RUL'] = train_df['RUL'].astype(float)

sequence_length = 50

def gen_sequence(id_df, seq_length, seq_cols):
    #指定された列の値を取得
    data_array = id_df[seq_cols].values
    #num_elements : 特定idのデータ数 (for id = 1, it is 192)
    num_elements = data_array.shape[0]
    # for id = 1, zip from both range(0, 142) & range(50, 192)
    for start, stop in zip(range(0, num_elements-seq_length), range(seq_length, num_elements)):
        #print(start,stop)
        yield data_array[start:stop, :]
        
        
#  特徴量となる列の抽出 
sensor_cols = ['s' + str(i) for i in range(1,22)]
sequence_cols = ['setting1', 'setting2', 'setting3', 'cycle_norm']
sequence_cols.extend(sensor_cols)

# 学習データのsequences作成
seq_gen = (list(gen_sequence(train_df[train_df['id']==id], sequence_length, sequence_cols)) for id in train_df['id'].unique())
seq_array = np.concatenate(list(seq_gen)).astype(np.float32)

# function to generate labels
def gen_labels(id_df, seq_length, label):
    data_array = id_df[label].values
    num_elements = data_array.shape[0]
    return data_array[seq_length:num_elements, :]

# generate labels
label_gen = [gen_labels(train_df[train_df['id']==id], sequence_length, ['label1']) 
             for id in train_df['id'].unique()]
label_array = np.concatenate(label_gen).astype(np.float32)



epochs=args.epochs
batch_size=args.batchsize
validation_split=0.05


# Hyper-Parameter
run.log("エポック数",epochs)
run.log("バッチサイズ",batch_size)
run.log("検証データ分割",validation_split)


class RunCallback(tf.keras.callbacks.Callback):
    def __init__(self, run):
        self.run = run
        
    def on_epoch_end(self, batch, logs={}):
        print("test")
        self.run.log(name="training_loss", value=float(logs.get('loss')))
        self.run.log(name="validation_loss", value=float(logs.get('val_loss')))
        self.run.log(name="training_acc", value=float(logs.get('acc')))
        self.run.log(name="validation_acc", value=float(logs.get('val_acc')))

callbacks = list()
callbacks.append(RunCallback(run))

# モデルネットワークの定義
nb_features = seq_array.shape[2]
nb_out = label_array.shape[1]
print("nb_features:",seq_array.shape[2])
print("nb_out:",label_array.shape[1])

model = Sequential()

model.add(LSTM(
         input_shape=(sequence_length, nb_features),
         units=100,
         return_sequences=True))
model.add(Dropout(0.2))

model.add(LSTM(
          units=50,
          return_sequences=False))
model.add(Dropout(0.2))

model.add(Dense(units=nb_out, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

print(model.summary())

model.fit(x = seq_array, y = label_array, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=1,
          callbacks = callbacks)



# training metrics
scores = model.evaluate(seq_array, label_array, verbose=1, batch_size=200)
run.log("損失",scores[0])
run.log("モデル精度", scores[1])

os.makedirs('./outputs/model', exist_ok=True)
model.save_weights('./outputs/mnist_mlp_weights.h5')

Overwriting ./keras-lstm/keras_lstm.py


## Machine Learning Compute設定

Machine Learning Computeの設定を行います。GPUの場合は**gpucluster**、CPUの場合は**cpucluster**を指定します。

In [10]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
compute_target = ComputeTarget(workspace,"gpucluster")
#compute_target = ComputeTarget(ws,"cpucluster")

## モデル学習設定

TensorFlowのEstimatorの設定を行います。GPUでモデル学習する際は、use_gpu = Trueに設定します。 CPUしか利用できない場合は、このパラメーターを削除するか、user_gpu=False に設定しなおします。

In [11]:
from azureml.train.dnn import TensorFlow
from azureml.train.estimator import Estimator

script_params = {
    '--dataset': ds.as_mount()
}

estimator = TensorFlow(source_directory=project_folder,
                       compute_target=compute_target,
                       entry_script='keras_lstm.py',
                       script_params=script_params,
                       framework_version = '1.13',
                       pip_packages = ['keras'],
                      )

# estimator = Estimator(source_directory=project_folder,
#                        compute_target=compute_target,
#                        entry_script='keras_lstm.py',
#                        script_params=script_params,
#                        pip_packages = ['pandas','tensorflow==2.0.0','keras'],
#                       )



### 実行開始

上記で定義した TensorFlow Estimator の設定に従って、トレーニング環境を構築し、モデル学習を始めます。

In [12]:
run = experiment.submit(estimator)
print(run)



Run(Experiment: lstm-rul-aml,
Id: lstm-rul-aml_1570551179_4e858781,
Type: azureml.scriptrun,
Status: Preparing)


In [16]:
from azureml.widgets import RunDetails
RunDetails(run).show()

_UserRunWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', '…

モデル無事完了したことを確認して、次に進みます。

## モデル登録

In [17]:
run.get_file_names()

['azureml-logs/20_image_build_log.txt',
 'azureml-logs/55_azureml-execution-tvmps_f9943600cc420db1ea2d23af1032def313e315926abc3a41d9e0d58404fe485b_p.txt',
 'azureml-logs/65_job_prep-tvmps_f9943600cc420db1ea2d23af1032def313e315926abc3a41d9e0d58404fe485b_p.txt',
 'azureml-logs/70_driver_log.txt',
 'azureml-logs/75_job_post-tvmps_f9943600cc420db1ea2d23af1032def313e315926abc3a41d9e0d58404fe485b_p.txt',
 'logs/azureml/152_azureml.log',
 'logs/azureml/azureml.log',
 'outputs/mnist_mlp_weights.h5']

In [18]:
model = run.register_model(model_name = 'RUL-lstm-keras', model_path = 'outputs/mnist_mlp_weights.h5',tags = {'area': "turbine predictive maintenance", 'type': "lstm"})
print(model.name, model.id, model.version, sep = '\t')

RUL-lstm-keras	RUL-lstm-keras:7	7


In [19]:
# run.get_details()

# ハイパーパラメータチューニング  Hyperdrive

Machine Learning Compute を用いて複数サーバでパラメータチューニングを分散で実行します。今回は Random Search を用います。

In [24]:
from azureml.train.hyperdrive.runconfig import HyperDriveConfig
from azureml.train.hyperdrive.sampling import RandomParameterSampling
from azureml.train.hyperdrive.policy import BanditPolicy
from azureml.train.hyperdrive.run import PrimaryMetricGoal
from azureml.train.hyperdrive.parameter_expressions import choice
    

param_sampling = RandomParameterSampling( {
    "--batchsize": choice(32, 64, 128, 256),
    "--epochs": choice(5, 10, 20, 40, 80)
    }
)

hyperdrive_run_config = HyperDriveConfig(estimator=estimator,
                                            hyperparameter_sampling=param_sampling, 
                                            primary_metric_name='validation_acc',
                                            primary_metric_goal=PrimaryMetricGoal.MAXIMIZE,
                                            max_total_runs=4,
                                            max_concurrent_runs=4)

## 実行開始

In [25]:
hyperdrive_run = experiment.submit(hyperdrive_run_config)

In [27]:
from azureml.widgets import RunDetails
RunDetails(hyperdrive_run).show()

_HyperDriveWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO'…