In [None]:
import azureml.core
from azureml.core import Workspace, Experiment, Environment, ScriptRunConfig
from datetime import datetime
import os
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException
from azureml.core import Dataset
from azureml.data import OutputFileDatasetConfig
from azureml.core import Workspace, Environment
from IPython.display import Image

use_gpu = True

datetime.now()

In [None]:
ws = Workspace.from_config()
datastore = ws.get_default_datastore()
datafldr_path = Dataset.File.from_files(datastore.path('credit_risk_data/')).as_named_input('data').as_mount()
output = OutputFileDatasetConfig(destination=(datastore, 'outputs/profiles/')).as_upload(overwrite=True)
# years to use for training
years = [2007, 2008, 2009]
years = ",".join(map(str, years))

In [None]:
ws = Workspace.from_config()
print(ws.name, ws.resource_group, ws.location, ws.subscription_id, sep='\n')

datastore = ws.get_default_datastore()
print("Default datastore's name: {}".format(datastore.name))

In [None]:
experiment_name = 'credit-default-risk-sample'
experiment = Experiment(workspace=ws, name=experiment_name)


In [None]:
if use_gpu:
    cluster_name = 'Standard-NC12s-v3' #'gpu-cluster'
else:
    cluster_name = 'cpu-cluster'

if cluster_name in ws.compute_targets:
    gpu_cluster = ws.compute_targets[cluster_name]
    if gpu_cluster and type(gpu_cluster) is AmlCompute:
        print('Found compute target. Will use {0} '.format(cluster_name))
else:
    print('creating new cluster')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = 'Standard_NC12s_v3',  #'Standard_M128s'
                                                                max_nodes = 1,
                                                                idle_seconds_before_scaledown = 600,
                                                                vm_priority = "lowpriority")
    
    gpu_cluster = ComputeTarget.create(ws, cluster_name, provisioning_config)
    gpu_cluster.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    
# use get_status() to get a detailed status for the current cluster 
print(gpu_cluster.get_status().serialize())

In [None]:
ws = Workspace.from_config()
env_list = Environment.list(ws)
env = Environment.get(workspace=ws, name="credit-risk")

env.docker.enabled = True
env.docker.base_image = None
env.python.user_managed_dependencies = True
env.environment_variables = None
env.python.interpreter_path = "/opt/conda/envs/rapids/bin/python"


In [None]:
config = ScriptRunConfig(source_directory="./src", script="crs_main.py", compute_target=gpu_cluster, environment=env, 
    arguments =[years],
)
run = experiment.submit(config)
print(run)

print(run.wait_for_completion(show_output=True))

In [None]:
metrics = run.get_metrics()
metrics

In [None]:
run.get_file_names()

In [None]:
os.makedirs('files', exist_ok=True)

for f in run.get_file_names():
    if not f.startswith('assets'):
        continue
    dest = os.path.join('files', f.split('/')[-1])
    print('Downloading file {} to {}...'.format(f, dest))
    run.download_file(f, dest)   


In [None]:
Image(filename='files/shap.png') 