In [15]:
import azureml.core
from azureml.core import Workspace

#Load workspace from the saved config file
try:
     ws = Workspace.from_config()
     print("Ready to use Azure ML SDK {} to work with {}".format(azureml.core.VERSION, ws.name))
except:
     print("Could not load AML workspace")

Ready to use Azure ML SDK 1.26.0 to work with mlw_sdk


In [16]:
#Get the default datastore
default_ds = ws.get_default_datastore()

#Enumerate all datastores, indicating which is the default
for ds_name in ws.datastores:
    print(ds_name, "- Default =", ds_name == default_ds.name)

azureml_globaldatasets - Default = False
workspaceblobstore - Default = True
workspacefilestore - Default = False


## Uploading all data files into default data storage (blob), create and register tabular datasets

In [17]:
import os
from azureml.core import Dataset

directory = '..\..\data\OG_data'
for filename in os.listdir(directory):
    if filename.endswith(".txt") or filename.endswith(".csv"):
        try:
            default_ds.upload_files(files=['{}\{}'.format(directory,filename)], target_path = 'data\OG_data', overwrite = True, show_progress = True)
            print("{} has been succesfully uploaded to datastore.\n".format(filename))
        except:
            print("Failed to upload {}.\n".format(filename))
        
        try:
            tabular_data = Dataset.Tabular.from_delimited_files(path=(default_ds, 'data/OG_data/{}'.format(filename)))
        except:
            print("Failed to create a Tabular Dataset")

        try:
            registered_dataset = tabular_data.register(workspace=ws, name= filename, description='Optional', tags = {'format':'CSV'}, create_new_version=True)
        except Exception as ex:
            print(ex)
        print('Datasets registered\n')
        
    continue

Uploading an estimated of 1 files
Uploading ..\..\data\OG_data\RUL_FD001.csv
Uploaded ..\..\data\OG_data\RUL_FD001.csv, 1 files out of an estimated total of 1
Uploaded 1 files
RUL_FD001.csv has been succesfully uploaded to datastore.

Datasets registered

Uploading an estimated of 1 files
Uploading ..\..\data\OG_data\test_FD001.csv
Uploaded ..\..\data\OG_data\test_FD001.csv, 1 files out of an estimated total of 1
Uploaded 1 files
test_FD001.csv has been succesfully uploaded to datastore.

Datasets registered

Uploading an estimated of 1 files
Uploading ..\..\data\OG_data\train_FD001.csv
Uploaded ..\..\data\OG_data\train_FD001.csv, 1 files out of an estimated total of 1
Uploaded 1 files
train_FD001.csv has been succesfully uploaded to datastore.

Datasets registered



In [18]:
#Print all avail
print("Datasets:")
for dataset_name in list(ws.datasets.keys()):
    dataset = Dataset.get_by_name(ws, dataset_name)
    print("\t", dataset.name, 'version', dataset.version)

Datasets:
	 train_FD001.csv version 1
	 test_FD001.csv version 1
	 RUL_FD001.csv version 1
	 train_FD001.txt version 1
	 test_FD001.txt version 1
	 RUL_FD001.txt version 1


## Manual upload and dataset registration

In [3]:
#Upload data to datastore
#Train and test data are both stored in the "data" folder inside the datastore.

name_train_data = 'train_data.csv' #Enter name of local train data
name_test_data = 'test_data.csv' #Enter name of local test data
try:
    default_ds.upload_files(files=['..\..\data\OG_data\{}'.format(name_train_data), '..\..\data\{}'.format(name_test_data)], target_path = 'data', overwrite = True, show_progress = True)
    print("Train and test data succesfully uploaded to datastore.")
except:
    print("Failed to uploaded train and test data.")
    


Uploading an estimated of 2 files
Uploading ..\..\data\test_data.csv
Uploaded ..\..\data\test_data.csv, 1 files out of an estimated total of 2
Uploading ..\..\data\train_data.csv
Uploaded ..\..\data\train_data.csv, 2 files out of an estimated total of 2
Uploaded 2 files
Train and test data succesfully uploaded to datastore.


In [4]:
#Create a tabular dataset

from azureml.core import Dataset
try:
    tab_dataset_train = Dataset.Tabular.from_delimited_files(path=(default_ds, 'data/{}'.format(name_train_data)))
    tab_dataset_test = Dataset.Tabular.from_delimited_files(path=(default_ds, 'data/{}'.format(name_test_data)))
except:
    print("Failed to create a Tabular Dataset")

try:
    tab_dataset_train = tab_dataset_train.register(workspace=ws, name= name_train_data, description='Optional', tags = {'format':'CSV'}, create_new_version=True)
    tab_dataset_test = tab_dataset_test.register(workspace=ws, name= name_test_data, description='Optional', tags = {'format':'CSV'}, create_new_version=True)
except Exception as ex:
    print(ex)
print('Datasets registered')

Datasets registered
