# Predict Remaining Useful Lifetime - Nasa Torbofan

## Pre-work
At first, we prepare training for automated machine learning and use [tsfresh](https://tsfresh.readthedocs.io/en/latest/) for time-series feature engineering.

In [None]:
#!pip install --upgrade pip

In [None]:
#!pip install tsfresh

In [None]:
%matplotlib inline

import pandas as pd 

from tsfresh import extract_features
from tsfresh.utilities.dataframe_functions import make_forecasting_frame
from tsfresh.utilities.dataframe_functions import impute
from tsfresh.feature_extraction.settings import ComprehensiveFCParameters, MinimalFCParameters, EfficientFCParameters
import datetime
pd.set_option('display.max_columns', 60)

In [None]:
train_df = pd.read_csv('./data/train.csv')
test_df = pd.read_csv('./data/test.csv')

In [None]:
train_df.head(10)

In [None]:
import matplotlib.pyplot as plt

plt.clf()
plt.figure()
train_df[train_df["id"]==1].plot(subplots=True, sharex=True, figsize=(20,20))
#plt.savefig(img_path+"turbine1-line.png")
plt.show()

In [None]:
#settings= MinimalFCParameters() # only a few basic features
#settings
# choose very basic features for quick demo.
settings_minimal = {'mean': None, 'maximum': None,'minimum': None}

In [None]:
# extract id list for train & test data
unit_list_train = train_df["id"].unique()
unit_list_test = test_df["id"].unique()

In [None]:
#　extract column name for feature
sensor_cols = ['s' + str(i) for i in range(1,22)]
sequence_cols = ['setting1', 'setting2', 'setting3', 'cycle']
sequence_cols.extend(sensor_cols)

In [None]:
def create_feature_extract(unit_list, df):
    all_df = pd.DataFrame()
    for unit_id_num in unit_list:
        print("unit id is", unit_id_num)
        tmp = df[df['id']==unit_id_num]
        base_df = pd.DataFrame() 

        for col in sequence_cols:
            df_shift, y = make_forecasting_frame(tmp[col], kind=col, max_timeshift=5, rolling_direction=2)
            X = extract_features(df_shift, column_id="id", column_sort="time", column_kind="kind", column_value="value", impute_function=impute, show_warnings=False, default_fc_parameters = settings_minimal)
            #X = X.loc[:, X.apply(pd.Series.nunique) != 1] # remove unique columns
            base_df = pd.concat([base_df, X], axis=1)
        base_df["id"] = unit_id_num
        base_df["RUL"] = tmp["RUL"]
        all_df = all_df.append(base_df)
    return all_df

In [None]:
#train = create_feature_extract(unit_list_train[:20],train_df)
train = create_feature_extract(unit_list_train[:10],train_df)

In [None]:
test = create_feature_extract(unit_list_test[:10],test_df)

In [None]:
train.head()

In [None]:
test.head()

## Automated Machine Learning

In [None]:
from azureml.core import Workspace, Experiment
from azureml.train.automl import AutoMLConfig

In [None]:
print("Azure ML SDK Version: ", azureml.core.VERSION)

In [None]:
ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

In [None]:
experiment = Experiment(workspace = ws, name = "automl-with-tsfresh")

In [None]:
y_train = train['RUL'].values
X_train = train.drop(["RUL","id"], axis=1)

In [None]:
y_test = test['RUL'].values
X_test = test.drop(["RUL","id"], axis=1)

In [None]:
automl_config = AutoMLConfig(task = 'regression',
                             iteration_timeout_minutes = 100,
                             iterations = 10,
                             n_cross_validations = 3,
                             X = X_train, 
                             y = y_train
                             )

In [None]:
local_run = experiment.submit(automl_config, show_output=True)

In [None]:
from azureml.widgets import RunDetails
RunDetails(local_run).show()

In [None]:
best_run, fitted_model = local_run.get_output()
best_run

## Model interpretability 

In [None]:
from azureml.explain.model.tabular_explainer import TabularExplainer
classes = ["false","true"]
tabular_explainer = TabularExplainer(fitted_model, X_train, features=X_train.columns)

In [None]:
global_explanation = tabular_explainer.explain_global(X_test[:100])

In [None]:
from azureml.contrib.explain.model.visualize import ExplanationDashboard
ExplanationDashboard(global_explanation, fitted_model, X_test[:100])