# 熱中症患者数予測
#### 気象データから熱中症患者数を予測する回帰モデルを作成する

## Azure ML Service用の準備

In [None]:
import azureml
from azureml.core import Workspace, Run

# check core SDK version number
print("Azure ML SDK Version: ", azureml.core.VERSION)

In [None]:
# load workspace configuration from the config.json file in the current folder.
ws = Workspace.from_config()
print(ws.name, ws.location, ws.resource_group, ws.location, sep = '\t')

In [None]:
experiment_name = 'Heatstroke_patient_prediction'

from azureml.core import Experiment
exp = Experiment(workspace=ws, name=experiment_name)

import pandas as pd

project_folder = './sample_projects/Heatstroke_patient_prediction'
output = {}
output['SDK version'] = azureml.core.VERSION
output['Subscription ID'] = ws.subscription_id
output['Workspace Name'] = ws.name
output['Resource Group'] = ws.resource_group
output['Location'] = ws.location
output['Project Directory'] = project_folder
output['Experiment Name'] = exp.name
pd.set_option('display.max_colwidth', -1)
outputDf = pd.DataFrame(data = output, index = [''])
outputDf.T

## トレーニング用データをストレージにアップロード

In [None]:
import pandas as pd
df = pd.read_csv('./data/Heatstroke_patient_prediction_train_data.csv')

In [None]:
df.head(10)

In [None]:
# get the default datastore
ds = ws.get_default_datastore()
print(ds.name, ds.datastore_type, ds.account_name, ds.container_name)

In [None]:
ds.upload_files(['./data/Heatstroke_patient_prediction_train_data.csv'],overwrite=True)

## トレーニング実行用の ML Compute 作成

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your GPU cluster
cluster_name = "cpucluster"

# Verify that cluster does not exist already
try:
    cluster = ComputeTarget(workspace=ws, name=cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D4_V2', #'Standard_NC6'
                                                           min_nodes=0,
                                                           max_nodes=8)
    cluster = ComputeTarget.create(ws, cluster_name, compute_config)

cluster.wait_for_completion(show_output=True)

## Automated ML 機能を使って予測モデル作成

In [None]:
import os
os.makedirs(project_folder, exist_ok=True)

### モデル学習用スクリプト作成

In [None]:
%%writefile $project_folder/get_data.py

import numpy as np
from azure.storage.blob import BlockBlobService
import pandas as pd
import os.path

def get_data():
    file_name = "Heatstroke_patient_prediction_train_data.csv"
    if not os.path.exists('./' + file_name) :
        account_name='<ストレージアカウント名>'
        account_key='<ストレージアカウントキー>'
        container_name='<コンテナ名>'
        blob_name=file_name
        
        service = BlockBlobService(account_name=account_name, account_key=account_key)
        service.get_blob_to_path(container_name, blob_name, file_name)
    
    df = pd.read_csv('./' + file_name)
    X_train = df.drop(columns=["年月日","年","月","搬送人員（計）"],axis=1)
    y_df = df["搬送人員（計）"]
    y_train = y_df.values

    return { "X" : X_train, "y" : y_train }

### モデル学習実行環境定義

In [None]:
from azureml.core.runconfig import RunConfiguration
from azureml.core.conda_dependencies import CondaDependencies

# create a new RunConfig object
run_config = RunConfiguration(framework="python")

# Set compute target to AmlCompute target created in previous step
run_config.target = cluster.name

# enable Docker 
run_config.environment.docker.enabled = True
run_config.environment.docker.base_image = azureml.core.runconfig.DEFAULT_CPU_IMAGE

# specify CondaDependencies obj
run_config.environment.python.conda_dependencies = CondaDependencies.create(
    conda_packages=['numpy','pandas'], 
    pip_packages=['azureml-sdk[automl]', 'azure-storage'])

### automated ML 実行

In [None]:
from azureml.train.automl import AutoMLConfig
import logging

automated_ml_config = AutoMLConfig(task = 'regression',
                             primary_metric = 'normalized_mean_absolute_error',
                             iteration_timeout_minutes = 10,
                             iterations = 30,
                             preprocess = True,
                             verbosity = logging.INFO,
                             experiment_exit_score = 0,
                             path = project_folder,
                             data_script = project_folder + "/get_data.py",
                             run_configuration=run_config,
                             debug_log = 'automated_ml_errors.log',
                             n_cross_validations = 3,
                             max_concurrent_iterations = 4)

In [None]:
run_automl = exp.submit(automated_ml_config, show_output = False)
run_automl

In [None]:
from azureml.widgets import RunDetails
RunDetails(run_automl).show()

In [None]:
%%time
# Shows output of the run on stdout.
run_automl.wait_for_completion(show_output=True)

In [None]:
children = list(run_automl.get_children())
metricslist = {}
for run in children:
    properties = run.get_properties()
    metrics = {k: v for k, v in run.get_metrics().items() if isinstance(v, float)}
    metricslist[int(properties['iteration'])] = metrics

rundata = pd.DataFrame(metricslist).sort_index(1)
rundata

## モデル登録

In [None]:
best_run, fitted_model = run_automl.get_output()
print(best_run)
print(fitted_model)

In [None]:
description = 'Heatstroke patient prediction Model'
tags = None
model = run_automl.register_model(description=description, tags=tags)
print(run_automl.model_id) # Use this id to deploy the model as a web service in Azure