In [3]:
import azureml.core
# Check core SDK version number
print("SDK version:", azureml.core.VERSION)

SDK version: 1.0.17


In [4]:
from azureml.core import Workspace, Datastore

ws = Workspace.from_config("../config2.json")
print("Workspace {} is loaded".format(ws.name))

Found the config file in: /Users/sleebapaul/AML_MSFT/config2.json
Workspace Auria is loaded


In [5]:
datastores = ws.datastores
for name, ds in datastores.items():
    print(name, ds.datastore_type, ds.container_name, ds.account_name)

workspaceblobstore AzureBlob azureml-blobstore-963d160e-9bdd-40a8-852c-1fc06dfe7c7a auria5226806917
workspacefilestore AzureFile azureml-filestore-963d160e-9bdd-40a8-852c-1fc06dfe7c7a auria5226806917
videos AzureBlob videos happypathspublic
models AzureBlob styletransfer pipelinedata
breeds AzureBlob azureml-blobstore-315a742f-5c1f-465a-9486-7fd50bbb0012 danielscstoragebhmgfqha
images_datastore AzureBlob sampledata pipelinedata


In [6]:
cts = ws.compute_targets
for ct in cts:
    print(ct)

testing6854
demo-dogbreeds
gpuclusterNCv2
gpuclusterNCv3
cpucluster
v100cluster1
gpucluster
cpu-cluster
gpu-cluster


In [7]:
from azureml.core.compute import AmlCompute

# CPU Cluster
cpu_compute_target = "cpucluster"

try:
    cpu_compute = AmlCompute(ws, cpu_compute_target)
    print("Found existing compute target: {}".format(cpu_compute.name))
except:
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_D2_V2",
                                                                min_nodes = 1,
                                                                max_nodes = 4)
    cpu_compute = AmlCompute.create(ws, cpu_compute_target, provisioning_config)
    cpu_compute.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    print("Created new compute target: {}".format(cpu_compute.name))

# GPU Cluster
gpu_compute_target_1 = "gpuclusterNCv2"

try:
    gpu_compute_1 = AmlCompute(ws, gpu_compute_target_1)
    print("Found existing compute target: {}".format(gpu_compute_1.name))
except:
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_NC6S_V2",
                                                                min_nodes = 1,
                                                                max_nodes = 4)
    gpu_compute_1 = AmlCompute.create(ws, gpu_compute_target_1, provisioning_config)
    gpu_compute_1.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    print("Created new compute target: {}".format(gpu_compute_1.name))
    
    
gpu_compute_target_2 = "gpuclusterNCv3"

try:
    gpu_compute_2 = AmlCompute(ws, gpu_compute_target_2)
    print("Found existing compute target: {}".format(gpu_compute.name))
except:
    provisioning_config = AmlCompute.provisioning_configuration(vm_size = "STANDARD_NC6S_V3",
                                                                min_nodes = 1,
                                                                max_nodes = 4)
    gpu_compute_2 = AmlCompute.create(ws, gpu_compute_target_2, provisioning_config)
    gpu_compute_2.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)
    print("Created new compute target: {}".format(gpu_compute_2.name))

Found existing compute target: cpucluster
Found existing compute target: gpuclusterNCv2
Succeeded
AmlCompute wait for completion finished
Minimum number of nodes requested have been provisioned
Created new compute target: gpuclusterNCv3


In [8]:
from azureml.data.data_reference import DataReference

fs = Datastore.get(ws, datastore_name='workspacefilestore')
trainedModel = DataReference(
    datastore=fs,
    data_reference_name="finetunedGPTModel",
    path_on_datastore="finetunedGPTModel/run1")

print("DataReference object created")

DataReference object created


In [24]:
from azureml.train.estimator import Estimator
from azureml.pipeline.core import PipelineData
from azureml.pipeline.core.graph import PipelineParameter
from azureml.core.runconfig import CondaDependencies, RunConfiguration

project_folder = "../gpt-2-finetuning/"

output_model_location = PipelineData("GPT2TrainedModel", datastore=fs)


script_params = {
    '--run_name': trainedModel,
}

genHaikuStep = Estimator(source_directory=project_folder,
                         compute_target=gpu_compute_target_1,
                         entry_script='generateFromGPT.py',
                         script_params=script_params,
                         node_count=1,
                         process_count_per_node=1,
                         pip_packages=[ "gpt-2-simple", "numpy", "tensorflow-gpu==1.13.1"],
                         custom_docker_image = "sleebapaul/cuda_10_0_cudnn_7_conda:ubuntu",
                         use_gpu=True)
genHaikuStep._estimator_config.environment.python.user_managed_dependencies=False

print("genHaikuStep step created")

genHaikuStep step created


In [25]:
from azureml.core import Experiment

experiment_name = 'GPT2_testing_Experiment'
experiment = Experiment(ws, name=experiment_name)

In [26]:
gen_haiku_run = experiment.submit(genHaikuStep)

print("Pipeline is submitted for execution")

Pipeline is submitted for execution
