### 実験
- データセット：Google Cloud Storaage
- 計算環境：Azure ML Computing Cluster
- モデル登録：Azure ML

In [None]:
import pandas as pd
import sklearn
import sys
from google.cloud import storage
import azureml.core

print('python version: ', sys.version)
print("azureml version", azureml.core.VERSION)
print("storage version", storage.__version__)

### azuremlの設定

In [None]:
import os
import urllib
import shutil
import azureml

from azureml.core import Experiment
from azureml.core import Workspace, Run

from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

In [None]:
# Tenantの設定

from azureml.core.authentication import InteractiveLoginAuthentication
auth=InteractiveLoginAuthentication(force=True, tenant_id="tenant_idを指定します")

In [None]:
from azureml.core import Experiment, Workspace, Dataset

try:
    ws = Workspace(
        subscription_id = "サブスクリプションIdを指定します", 
        resource_group = "リソースグループを指定します"",
        workspace_name = "Azure MLのリソース名を指定します",
        auth = auth
    )
 
    ws.write_config()
    print("Workspace configuration succeeded.")
except:
    print("Workspace not accessible. Change your parameters or create a new workspace below")

In [None]:
from azureml.core.workspace import Workspace

ws = Workspace.from_config()
print('Workspace name: ' + ws.name, 
      'Azure region: ' + ws.location, 
      'Subscription id: ' + ws.subscription_id, 
      'Resource group: ' + ws.resource_group, sep = '\n')

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

compute_target = ComputeTarget(ws,"StandardD4v2L")

### プロジェクトのフォルダー作成

In [None]:
project_folder = './project_folder'
os.makedirs(project_folder, exist_ok=True)

In [None]:
from azureml.core import Experiment

experiment_name = 'azure_ml_gcs'
experiment = Experiment(ws, name=experiment_name)

In [None]:
project_name = "gcpのプロジェクト名を指定します"
mybucketname = "mybucket-amlgcp202007"

In [None]:
from azureml.train.sklearn import SKLearn
file_path = mybucketname + '/data/breast_cancer.csv'
script_params = {
    '--kernel': 'random_forest',
    '--penalty': 1.0,
    '--credentail_path_arg': 'credential-gbqtoaml-1c3df6d8f54e.json',
    '--project_name_arg': project_name,
    '--bucket_name_arg': mybucketname,
    '--file_path_arg': file_path,
}

estimator = SKLearn(source_directory=project_folder, 
                    script_params=script_params,
                    compute_target=compute_target,
                    entry_script='train_breast_cancer_gcs.py',
                    pip_packages=['joblib==0.13.2', 'google-cloud-storage==1.29.0', 'gcsfs==0.6.2']
                   )

In [None]:
run = experiment.submit(estimator)

In [None]:
from azureml.widgets import RunDetails

RunDetails(run).show()

In [None]:
run.wait_for_completion(show_output=True)

In [None]:
run

#### モデル登録

In [None]:
print(run.get_file_names())

In [None]:
model = run.register_model(model_name='sklearn-breast_cancer', model_path='outputs/model.joblib')