In [None]:
import os

import azureml.core

subscription_id = os.getenv("SUBSCRIPTION_ID", default="f97fb87f-32d7-4d7c-9bc5-ea43b4fea7ac")
resource_group = os.getenv("RESOURCE_GROUP", default="ManyModelsRG")
workspace_name = os.getenv("WORKSPACE_NAME", default="ManyModelsAccelerator")
workspace_region = os.getenv("WORKSPACE_REGION", default="westus2")

In [None]:
from azureml.core import Workspace

try:
    ws = Workspace(subscription_id = subscription_id, 
                   resource_group = resource_group, 
                   workspace_name = workspace_name)
    # write the details of the workspace to a configuration file to the notebook library
    ws.write_config()
    print("Workspace configuration succeeded. Skip the workspace creation steps below")
except:
    print("Workspace not accessible. Change your parameters or create a new workspace below")

In [None]:

# Create the workspace using the specified parameters
ws = Workspace.create(name=workspace_name,
                  subscription_id=subscription_id,
                  resource_group=resource_group, 
                  location=workspace_region,
                  create_resource_group=True,
                  sku='basic',
                  exist_ok=True)
ws.get_details()

# write the details of the workspace to a configuration file in the parent folder

In [None]:
ws.write_config()

# Create compute

In [None]:
from azureml.core.compute import ComputeTarget, AmlCompute
from azureml.core.compute_target import ComputeTargetException

# Choose a name for your CPU cluster
cpu_cluster_name = "cpucluster"

# Verify that cluster does not exist already
try:
    cpu_cluster = ComputeTarget(workspace=ws, name=cpu_cluster_name)
    print('Found existing cluster, use it.')
except ComputeTargetException:
    compute_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D13_V2',
                                                           min_nodes=0,
                                                           max_nodes=5)
    cpu_cluster = ComputeTarget.create(ws, cpu_cluster_name, compute_config)

cpu_cluster.wait_for_completion(show_output=True)

# Create dataset

In [None]:
!pip install azureml-opendatasets

In [None]:
from azureml.core.dataset import Dataset
from azureml.opendatasets import OjSalesSimulated

# Pull all of the data
oj_sales_files = OjSalesSimulated.get_file_dataset()

# Pull the first 10 files
oj_sales_files_small = OjSalesSimulated.get_file_dataset().take(10)

target_path = 'oj_sales_data' 

oj_sales_files_small.download(target_path, overwrite=True)
datastore = ws.get_default_datastore()
datastore.upload(src_dir = target_path,
                target_path = target_path,
                overwrite = True)

path_on_datastore = datastore.path(target_path)
ds = Dataset.File.from_files(path=path_on_datastore, validate=False)

dataset_name = 'oj_data_small'
ds.register(ws, dataset_name, create_new_version=True)