# Predictive Maintenance using AutoML

In [5]:
from azureml.core import Workspace, Experiment
from azureml.train.automl import AutoMLConfig

In [6]:
print("Azure ML SDK Version: ", azureml.core.VERSION)

Azure ML SDK Version:  1.0.62


In [7]:
ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

Workspace name: aml
Azure region: japaneast
Subscription id: 89e493dd-8ada-47cd-a173-44914b224841
Resource group: aml


## Experiment

In [8]:
experiment = Experiment(workspace = ws, name = "pdm-automl")

## Training Data

In [9]:
import pandas as pd 

train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

In [10]:
train_df.head(10)

Unnamed: 0,id,cycle,setting1,setting2,setting3,s1,s2,s3,s4,s5,...,s16,s17,s18,s19,s20,s21,RUL,label1,label2,cycle_norm
0,1,1,0.46,0.17,0.0,0.0,0.18,0.41,0.31,0.0,...,0.0,0.33,0.0,0.0,0.71,0.72,191,0,0,0.0
1,1,2,0.61,0.25,0.0,0.0,0.28,0.45,0.35,0.0,...,0.0,0.33,0.0,0.0,0.67,0.73,190,0,0,0.0
2,1,3,0.25,0.75,0.0,0.0,0.34,0.37,0.37,0.0,...,0.0,0.17,0.0,0.0,0.63,0.62,189,0,0,0.01
3,1,4,0.54,0.5,0.0,0.0,0.34,0.26,0.33,0.0,...,0.0,0.33,0.0,0.0,0.57,0.66,188,0,0,0.01
4,1,5,0.39,0.33,0.0,0.0,0.35,0.26,0.4,0.0,...,0.0,0.42,0.0,0.0,0.59,0.7,187,0,0,0.01
5,1,6,0.25,0.42,0.0,0.0,0.27,0.29,0.27,0.0,...,0.0,0.25,0.0,0.0,0.65,0.65,186,0,0,0.01
6,1,7,0.56,0.58,0.0,0.0,0.38,0.46,0.26,0.0,...,0.0,0.33,0.0,0.0,0.74,0.67,185,0,0,0.02
7,1,8,0.3,0.75,0.0,0.0,0.41,0.26,0.32,0.0,...,0.0,0.25,0.0,0.0,0.64,0.57,184,0,0,0.02
8,1,9,0.55,0.58,0.0,0.0,0.27,0.43,0.21,0.0,...,0.0,0.33,0.0,0.0,0.71,0.71,183,0,0,0.02
9,1,10,0.31,0.58,0.0,0.0,0.15,0.44,0.31,0.0,...,0.0,0.42,0.0,0.0,0.63,0.79,182,0,0,0.02


In [12]:
#　Feature columns
sensor_cols = ['s' + str(i) for i in range(1,22)]
sequence_cols = ['setting1', 'setting2', 'setting3', 'cycle']
sequence_cols.extend(sensor_cols)
print(sequence_cols)

['setting1', 'setting2', 'setting3', 'cycle', 's1', 's2', 's3', 's4', 's5', 's6', 's7', 's8', 's9', 's10', 's11', 's12', 's13', 's14', 's15', 's16', 's17', 's18', 's19', 's20', 's21']


In [13]:
X = train_df[sequence_cols]
y = train_df['label1'].values

In [14]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,random_state=100,stratify=y)

## AutoML

In [15]:
automl_config = AutoMLConfig(task = 'classification',
                             iteration_timeout_minutes = 10,
                             iterations = 20,
                             primary_metric = 'AUC_weighted',
                             n_cross_validations = 5,
                             X = train_df[sequence_cols], 
                             y = train_df['label1'].values
                             )

## Training

In [16]:
local_run = experiment.submit(automl_config, show_output=True)

Running on local machine
Parent Run ID: AutoML_29aaf6ea-3b1d-43bb-964e-8f9e8b442755
Current status: DatasetCrossValidationSplit. Generating CV splits.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
****************************************************************************************************

 ITERATION   PIPELINE                                       DURATION      METRIC      BEST
         0   StandardScalerWrapper SGD                      0:00:14       0.9872    0.9872
         1   StandardScalerWrapper SGD                      0:00:10       0.9895    0.9895
         2   MinMaxScaler LightGBM                      

In [17]:
from azureml.widgets import RunDetails
RunDetails(local_run).show()

A Jupyter Widget

In [18]:
best_run, fitted_model = local_run.get_output()
best_run

Experiment,Id,Type,Status,Details Page,Docs Page
pdm-automl,AutoML_29aaf6ea-3b1d-43bb-964e-8f9e8b442755_18,,Completed,Link to Azure Portal,Link to Documentation


In [19]:
fitted_model

Pipeline(memory=None,
     steps=[('prefittedsoftvotingclassifier', PreFittedSoftVotingClassifier(classification_labels=None,
               estimators=[('6', Pipeline(memory=None,
     steps=[('StandardScalerWrapper', <automl.client.core.common.model_wrappers.StandardScalerWrapper object at 0x7ff983a75780>), ('LightGBMClassi...6666667, 0.06666666666666667, 0.06666666666666667, 0.13333333333333333, 0.2, 0.13333333333333333]))])

## Interpretability SDK

In [20]:
from azureml.explain.model.tabular_explainer import TabularExplainer

classes = ["false","true"]

tabular_explainer = TabularExplainer(fitted_model, X_train, features=X_train.columns, classes=classes)

In [21]:
global_explanation = tabular_explainer.explain_global(X_test[:100])

A Jupyter Widget




In [22]:
from azureml.contrib.explain.model.visualize import ExplanationDashboard

ExplanationDashboard(global_explanation, fitted_model, X_test[:100])

A Jupyter Widget

<azureml.contrib.explain.model.visualize.ExplanationDashboard.ExplanationDashboard at 0x7ff8b6badbe0>