MIT License

Copyright (c) Microsoft Corporation. All rights reserved.

This notebook is adapted from Microsoft Learning mslearn-dp100 

Copyright (c) 2021 PyLadies Amsterdam, Alyona Galyeva

# Generate batch data

In [None]:
import os
import pandas as pd
import numpy as np
from azureml.core import Workspace, Dataset

In [None]:
WORKDIR = os.getcwd()

In [None]:
# Create a folder
BATCH_FOLDER = 'batch-data'
os.makedirs(BATCH_FOLDER, exist_ok=True)
print("Folder created!")

In [None]:
# get the workspace from config.json
ws = Workspace.from_config()
# get the datastore to upload our data
datastore = ws.get_default_datastore()

In [None]:
# retrieve test dataset from Azure Datastore
test_ds = Dataset.get_by_name(ws, name="test_nyc_demand_data")
test = test_ds.to_pandas_dataframe()
test.head()

In [None]:
# pick up 1 week from Monday 31-07-2017 to Sunday 06-08-2017, drop target and timestamp, convert to numpy arrow
X = test[9480:9648]
X = X.drop(['demand', 'timeStamp'], axis=1)
X = X.to_numpy()

In [None]:
# Save each sample as a separate file
print("Saving files...")
for i in range(len(X)):
    fname = str(i+1) + '.csv'
    X[i].tofile(os.path.join(BATCH_FOLDER, fname), sep=",")
print("files saved!")

In [None]:
datastore.upload(src_dir="batch-data", target_path="batch-data", overwrite=True, show_progress=True)

In [None]:
# Register a dataset for the input data
batch_data_set = Dataset.File.from_files(path=(datastore, 'batch-data/'), validate=False)
try:
    batch_data_set = batch_data_set.register(workspace=ws, 
                                             name='batch-data',
                                             description='batch data for nyc demand energy forecast',
                                             create_new_version=True)
except Exception as ex:
    print(ex)

print("Done!")