In [1]:
!pip install azureml-sdk[notebooks]



In [2]:
from azureml.core import Workspace
ws = Workspace.get("myworkspace", subscription_id='ff711122-6294-4fad-9d1f-bf505a51fc42',
               resource_group='mlproject',
               location='westus2')


# 2. Create a reference to the Azure ML Datastore
A datastore is an AML-specific component that abstracts away the Azure resource the data is stored on. It allows for cross-subscription or cross-resource group data access on Azure (useful in enterprise context). The main advantage is that takes care of authentication for you (after initial setup, minimal key management is required).
You don’t necessarily need it. If you don’t, be ready to pollute your code with authentication/authorization snippets. More code = more liability!

In [4]:
from azureml.core import Dataset
default_ds = ws.get_default_datastore()
print(default_ds)

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-4251d2dc-9aa6-4571-a78d-2baff407acbf",
  "account_name": "myworkspstorage32cc8b3bb",
  "protocol": "https",
  "endpoint": "core.windows.net"
}


In [5]:
from azureml.core.datastore import Datastore
datastore = Datastore.get(
    workspace=ws, 
    datastore_name="workspaceblobstore"
)

In [6]:
print(datastore)

{
  "name": "workspaceblobstore",
  "container_name": "azureml-blobstore-4251d2dc-9aa6-4571-a78d-2baff407acbf",
  "account_name": "myworkspstorage32cc8b3bb",
  "protocol": "https",
  "endpoint": "core.windows.net"
}


In [9]:
def dataupload(filename):
    if 'diabetes dataset' not in ws.datasets:
        default_ds.upload_files(files=[filename], # Upload the diabetes csv files in /data
                            target_path='diabetes-data/', # Put it in a folder path in the datastore
                            overwrite=True, # Replace existing files of the same name
                            show_progress=True)

    #Create a tabular dataset from the path on the datastore (this may take a short while)
        tab_data_set = Dataset.Tabular.from_delimited_files(path=(default_ds, 'diabetes-data/*.csv'))

    # Register the tabular dataset
        try:
            tab_data_set = tab_data_set.register(workspace=ws, 
                                    name='diabetes dataset',
                                    description='diabetes data',
                                    tags = {'format':'CSV'},
                                    create_new_version=True)
            print('Dataset registered.')
        except Exception as ex:
            print(ex)
    else:
        print('Dataset already registered.')

In [10]:
dataupload('C:/Users/User/Desktop/Data/diabetes2.csv')

Dataset already registered.


In [11]:
dataupload('C:/Users/User/Desktop/Data/diabetes.csv')

Dataset already registered.
