# エネルギーの需要予測モデリング & モデル解釈

過去のエネルギー需要実績データから、将来必要なエネルギー需要を予測します。

## 1. 事前準備

### Python SDK のインポート
Azure Machine Learning の Python SDK をインポートします。

In [1]:
import logging

from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from matplotlib import pyplot as plt
import pandas as pd
import numpy as np
import warnings
import os

# 警告メッセージを削除する
warnings.showwarning = lambda *args, **kwargs: None

import azureml.core
from azureml.core import Experiment, Workspace, Dataset
from azureml.train.automl import AutoMLConfig
from datetime import datetime

In [2]:
# Python SDK バージョン確認
print(azureml.core.VERSION)

1.0.72


### Azure Machine Learning Workspace との接続

In [3]:
ws = Workspace.from_config()

# 実験名の指定
experiment_name = 'automl-forecasting-energydemand'

experiment = Experiment(ws, experiment_name)

output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Run History Name'] = experiment_name
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

Unnamed: 0,Unnamed: 1
SDK version,1.0.72
Subscription ID,9c0f91b8-eb2f-484c-979c-15848c098a6b
Workspace,azureml
Resource Group,uksouth
Location,uksouth
Run History Name,automl-forecasting-energydemand


### データの準備

In [4]:
# 予測対象変数と日時カラムの指定
target_column_name = 'demand' 
time_column_name = 'timeStamp'

In [5]:
dataset = Dataset.Tabular.from_delimited_files(path = "https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv").with_timestamp_columns(fine_grain_timestamp=time_column_name) 
dataset.take(5).to_pandas_dataframe()

Unnamed: 0,timeStamp,demand,precip,temp
0,2012-01-01 00:00:00,4937.5,0.0,46.13
1,2012-01-01 01:00:00,4752.1,0.0,45.89
2,2012-01-01 02:00:00,4542.6,0.0,45.04
3,2012-01-01 03:00:00,4357.7,0.0,45.03
4,2012-01-01 04:00:00,4275.5,0.0,42.61


In [6]:
# Cut off the end of the dataset due to large number of nan values
dataset = dataset.time_before(datetime(2017, 10, 10, 5))

In [7]:
# 2017年8月8日5:00 までを学習データ
train = dataset.time_before(datetime(2017, 8, 8, 5), include_boundary=True)
train.to_pandas_dataframe().sort_values(time_column_name).tail(5)

Unnamed: 0,timeStamp,demand,precip,temp
49071,2017-08-08 01:00:00,5106.35,0.0,68.3
49072,2017-08-08 02:00:00,4947.73,0.0,68.44
49073,2017-08-08 03:00:00,4867.02,0.0,68.78
49074,2017-08-08 04:00:00,4888.2,0.0,68.7
49075,2017-08-08 05:00:00,5120.31,0.0,67.53


In [8]:
# 2017年8月8日5:00 - 2017年8月10日5:00 までをテストデータ
test = dataset.time_between(datetime(2017, 8, 8, 5), datetime(2017, 8, 10, 5))
test.to_pandas_dataframe().head(5)

Unnamed: 0,timeStamp,demand,precip,temp
0,2017-08-08 06:00:00,5590.99,0.0,66.17
1,2017-08-08 07:00:00,6147.03,0.0,66.29
2,2017-08-08 08:00:00,6592.43,0.0,66.72
3,2017-08-08 09:00:00,6874.53,0.0,67.37
4,2017-08-08 10:00:00,7010.54,0.0,68.3


### 計算環境 Machine Learning Compute の設定

In [9]:
from azureml.core.compute import ComputeTarget
compute_target = ComputeTarget(ws, name = "cpucluster")

## 2. 自動機械学習 Automated Machine Learning
### 学習事前設定
設定項目の詳細は [Configure your experiment settings](https://docs.microsoft.com/en-US/azure/machine-learning/service/how-to-configure-auto-train#configure-your-experiment-settings) を参照

In [10]:
max_horizon = 48

In [11]:
automl_settings = {
    'time_column_name': time_column_name,
    #"max_concurrent_iterations": 6,
    'max_horizon': max_horizon,
}

automl_config = AutoMLConfig(task='forecasting',                             
                             primary_metric='normalized_root_mean_squared_error',
                             blacklist_models = ['ExtremeRandomTrees', 'AutoArima', 'Prophet'],                             
                             experiment_timeout_minutes=5,
                             iterations = 5,
                             training_data=train,
                             label_column_name=target_column_name,
                             compute_target=compute_target,  # ローカル環境で実行する場合には compute_target, max_concurrent_itterations をコメントアウトします
                             enable_early_stopping = True,
                             n_cross_validations=3,                             
                             verbosity=logging.INFO,
                            **automl_settings)

### 実行と結果確認

In [12]:
remote_run = experiment.submit(automl_config, show_output=True)

Running on remote compute: cpucluster
Parent Run ID: AutoML_e9fa1bd2-8663-436e-b5b1-55744c8ed8a9

Current status: DatasetFeaturization. Beginning to featurize the dataset.
Current status: ModelSelection. Beginning model selection.

****************************************************************************************************
ITERATION: The iteration being evaluated.
PIPELINE: A summary description of the pipeline being evaluated.
DURATION: Time taken for the current iteration.
METRIC: The result of computing score on the fitted pipeline.
BEST: The best observed score thus far.
****************************************************************************************************

 ITERATION   PIPELINE                                       DURATION      METRIC      BEST
         0   StandardScalerWrapper ElasticNet               0:01:19       0.1324    0.1324
         1   RobustScaler ElasticNet                        0:01:12       0.1347    0.1324
         2   StandardScalerWrapper 

In [14]:
from azureml.widgets import RunDetails
RunDetails(remote_run).show()

_AutoMLWidget(widget_settings={'childWidgetDisplay': 'popup', 'send_telemetry': False, 'log_level': 'INFO', 's…

In [15]:
automl_run, fitted_model = remote_run.get_output()
fitted_model.steps

[('timeseriestransformer', TimeSeriesTransformer(logger=None,
             pipeline_type=<TimeSeriesPipelineType.FULL: 1>)),
 ('stackensembleregressor',
  StackEnsembleRegressor(base_learners=[('2', Pipeline(memory=None,
       steps=[('standardscalerwrapper', <automl.client.core.runtime.model_wrappers.StandardScalerWrapper object at 0x7f447c769e80>), ('elasticnet', ElasticNet(alpha=0.8948421052631579, copy_X=True, fit_intercept=True,
        l1_ratio=0.4268421052631579, max_iter=1000, normalize=False,
        positive=False, precompute=False, random_state=None,
        selection='cyclic', tol=0.0001, warm_start=False))]))],
              meta_learner=ElasticNetCV(alphas=None, copy_X=True, cv='warn', eps=0.001,
         fit_intercept=True, l1_ratio=0.5, max_iter=1000, n_alphas=100,
         n_jobs=None, normalize=False, positive=False, precompute='auto',
         random_state=None, selection='cyclic', tol=0.0001, verbose=0),
              training_cv_folds=5))]

### モデルの理解

In [16]:
fitted_model.named_steps['timeseriestransformer'].get_engineered_feature_names()

['precip',
 'temp',
 'precip_WASNULL',
 'temp_WASNULL',
 'year',
 'half',
 'quarter',
 'month',
 'day',
 'hour',
 'am_pm',
 'hour12',
 'wday',
 'qday',
 'week']

In [17]:
# Get the featurization summary as a list of JSON
featurization_summary = fitted_model.named_steps['timeseriestransformer'].get_featurization_summary()
# View the featurization summary as a pandas dataframe
pd.DataFrame.from_records(featurization_summary)

Unnamed: 0,Dropped,EngineeredFeatureCount,RawFeatureName,Transformations,TypeDetected
0,No,2,precip,"[MeanImputer, ImputationMarker]",Numeric
1,No,2,temp,"[MeanImputer, ImputationMarker]",Numeric
2,No,11,timeStamp,"[DateTimeTransformer, DateTimeTransformer, DateTimeTransformer, DateTimeTransformer, DateTimeTransformer, DateTimeTransformer, DateTimeTransformer, DateTimeTransformer, DateTimeTransformer, DateTimeTransformer, DateTimeTransformer]",DateTime


## 3. モデル解釈

本ノートブックでは、ローカル環境でモデル解釈を実行します。<br>
Jupyter Lab を利用する場合には、下記コマンドを Terminal から実行し、Widget をインストールします。<br>

```bash
jupyter labextension install @jupyter-widgets/jupyterlab-manager
jupyter labextension install microsoft-mli-widget
```

In [18]:
from azureml.train.automl.automl_explain_utilities import AutoMLExplainerSetupClass, automl_setup_model_explanations
from azureml.explain.model.mimic.models.lightgbm_model import LGBMExplainableModel
from azureml.explain.model.mimic_wrapper import MimicWrapper
from azureml.contrib.interpret.visualize import ExplanationDashboard

In [19]:
# モデル解釈に利用するデータの準備
X_train = train.to_pandas_dataframe().drop([target_column_name], axis=1)
y_train = train.to_pandas_dataframe()[target_column_name].values
X_test = test.to_pandas_dataframe().drop([target_column_name], axis=1)

In [20]:
automl_explainer_setup_obj = automl_setup_model_explanations(fitted_model, 'forecasting',
                                                             X=X_train, X_test=X_test,
                                                             y=y_train)

Current status: Setting up data for AutoML explanations
Current status: Setting up the AutoML featurizer
Current status: Setting up the AutoML featurization for explanations
Current status: Setting up the AutoML estimator
Current status: Generating a feature map for raw feature importance
Current status: Data for AutoML explanations successfully setup


In [21]:
# Global surrogate model
explainer = MimicWrapper(ws, automl_explainer_setup_obj.automl_estimator, LGBMExplainableModel,
                         init_dataset=automl_explainer_setup_obj.X_transform, run=automl_run,
                         features=automl_explainer_setup_obj.engineered_feature_names,
                         feature_maps=[automl_explainer_setup_obj.feature_map],
                         classes=automl_explainer_setup_obj.classes)

### Engineered Explanation

In [22]:
# Compute the engineered explanations
engineered_explanations = explainer.explain(['local', 'global'],get_raw=False,
                                            eval_dataset=automl_explainer_setup_obj.X_test_transform)

In [23]:
ExplanationDashboard(engineered_explanations, automl_explainer_setup_obj.automl_estimator, automl_explainer_setup_obj.X_test_transform)

ExplanationWidget(value={'predictedY': [5679.164063646202, 5763.187619524015, 5859.079583665122, 5963.39428915…

<azureml.contrib.interpret.visualize.ExplanationDashboard.ExplanationDashboard at 0x7f44305a2160>

### Raw Explanation

In [24]:
# Compute the raw explanations
raw_explanations = explainer.explain(['local', 'global'], get_raw=True,
                                     raw_feature_names=automl_explainer_setup_obj.raw_feature_names,
                                     eval_dataset=automl_explainer_setup_obj.X_test_transform)

In [25]:
ExplanationDashboard(raw_explanations, automl_explainer_setup_obj.automl_pipeline, automl_explainer_setup_obj.X_test_raw)

ExplanationWidget(value={'predictedY': [5679.164063646202, 5763.187619524015, 5859.079583665122, 5963.39428915…

<azureml.contrib.interpret.visualize.ExplanationDashboard.ExplanationDashboard at 0x7f4430582668>