In [1]:
import os

import azureml.core

subscription_id = os.getenv("SUBSCRIPTION_ID", default="f97fb87f-32d7-4d7c-9bc5-ea43b4fea7ac")
resource_group = os.getenv("RESOURCE_GROUP", default="ManyModelsRG")
workspace_name = os.getenv("WORKSPACE_NAME", default="ManyModelsAccelerator")
workspace_region = os.getenv("WORKSPACE_REGION", default="westus2")

In [2]:
from azureml.core import Workspace

try:
    ws = Workspace(subscription_id = subscription_id, 
                   resource_group = resource_group, 
                   workspace_name = workspace_name)
    # write the details of the workspace to a configuration file to the notebook library
    ws.write_config()
    print("Workspace configuration succeeded. Skip the workspace creation steps below")
except:
    print("Workspace not accessible. Change your parameters or create a new workspace below")

Workspace configuration succeeded. Skip the workspace creation steps below


In [3]:

# Create the workspace using the specified parameters
ws = Workspace.create(name=workspace_name,
                  subscription_id=subscription_id,
                  resource_group=resource_group, 
                  location=workspace_region,
                  create_resource_group=True,
                  sku='basic',
                  exist_ok=True)
ws.get_details()

# write the details of the workspace to a configuration file in the parent folder

{'id': '/subscriptions/f97fb87f-32d7-4d7c-9bc5-ea43b4fea7ac/resourceGroups/ManyModelsRG/providers/Microsoft.MachineLearningServices/workspaces/ManyModelsAccelerator',
 'name': 'ManyModelsAccelerator',
 'location': 'westus2',
 'type': 'Microsoft.MachineLearningServices/workspaces',
 'tags': {},
 'sku': 'Enterprise',
 'workspaceid': 'a5ef5f7f-43af-4a79-9304-367ebf3609b0',
 'description': '',
 'friendlyName': '',
 'creationTime': '2019-11-26T21:10:54.5723355+00:00',
 'containerRegistry': '/subscriptions/f97fb87f-32d7-4d7c-9bc5-ea43b4fea7ac/resourceGroups/ManyModelsRG/providers/Microsoft.ContainerRegistry/registries/manymodelsac5120b58a',
 'keyVault': '/subscriptions/f97fb87f-32d7-4d7c-9bc5-ea43b4fea7ac/resourcegroups/manymodelsrg/providers/microsoft.keyvault/vaults/manymodelsacce2295477689',
 'applicationInsights': '/subscriptions/f97fb87f-32d7-4d7c-9bc5-ea43b4fea7ac/resourcegroups/manymodelsrg/providers/microsoft.insights/components/manymodelsacce9409214618',
 'identityPrincipalId': '250

In [4]:
ws.write_config()

# Create compute

In [5]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "hichandonew"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D13_V2',
                                                           min_nodes=5,
                                                           max_nodes=20)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

Found existing cluster, use it.
Succeeded
AmlCompute wait for completion finished

Minimum number of nodes requested have been provisioned


# Create dataset

In [6]:
!pip install azureml-opendatasets



Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.


In [9]:
from azureml.core.dataset import Dataset
from azureml.opendatasets import OjSalesSimulated

# Pull all of the data
oj_sales_files = OjSalesSimulated.get_file_dataset()

# Pull the first 10 files
oj_sales_files_small = OjSalesSimulated.get_file_dataset().take(10)

target_path = 'oj_sales_data' 

oj_sales_files_small.download(target_path, overwrite=True)
datastore = ws.get_default_datastore()
datastore.upload(src_dir = target_path,
                target_path = target_path,
                overwrite = True)

path_on_datastore = datastore.path(target_path)
ds = Dataset.File.from_files(path=path_on_datastore, validate=False)

dataset_name = 'oj_data_small'
ds.register(ws, dataset_name, create_new_version=True)

Uploading an estimated of 10 files
Uploading oj_sales_data\Store1000_dominicks.csv
Uploading oj_sales_data\Store1000_minute.maid.csv
Uploading oj_sales_data\Store1000_tropicana.csv
Uploading oj_sales_data\Store1001_dominicks.csv
Uploading oj_sales_data\Store1001_minute.maid.csv
Uploading oj_sales_data\Store1001_tropicana.csv
Uploading oj_sales_data\Store1002_dominicks.csv
Uploading oj_sales_data\Store1002_minute.maid.csv
Uploading oj_sales_data\Store1002_tropicana.csv
Uploading oj_sales_data\Store1003_dominicks.csv
Uploaded oj_sales_data\Store1000_minute.maid.csv, 1 files out of an estimated total of 10
Uploaded oj_sales_data\Store1002_minute.maid.csv, 2 files out of an estimated total of 10
Uploaded oj_sales_data\Store1002_dominicks.csv, 3 files out of an estimated total of 10
Uploaded oj_sales_data\Store1001_dominicks.csv, 4 files out of an estimated total of 10
Uploaded oj_sales_data\Store1001_minute.maid.csv, 5 files out of an estimated total of 10
Uploaded oj_sales_data\Store1000_

{
  "source": [
    "('workspaceblobstore', 'oj_sales_data')"
  ],
  "definition": [
    "GetDatastoreFiles"
  ],
  "registration": {
    "id": "7fcaaa6b-9024-4031-a4d6-9983c19ddf1c",
    "name": "oj_data_small",
    "version": 3,
    "workspace": "Workspace.create(name='ManyModelsAccelerator', subscription_id='f97fb87f-32d7-4d7c-9bc5-ea43b4fea7ac', resource_group='ManyModelsRG')"
  }
}