Copyright (c) Microsoft Corporation.

Licensed under the MIT License.

# Generate Inventory Predictions
---

**This notebook is designed to be run from an AzureML Compute instance as outlined in the 
[Many Models](https://github.com/microsoft/solution-accelerator-many-models/blob/master/EnvironmentSetup.md) documentation**.
It allows for the easy submission of an inferencing run on the existing many models pipeline. 

In [None]:
import azureml.core
from azureml.core import Workspace, Datastore, Dataset
import pandas as pd

ws = Workspace.from_config()

# Take a look at Workspace
ws.get_details()

In [None]:
blob_datastore_name = "automl_many_models2"
container_name = "mldata"
account_name = "<Storage Account Name>"
account_key= "<Storage Account Key>"


In [None]:
datastore = Datastore.register_azure_blob_container(
    workspace=ws, 
    datastore_name=blob_datastore_name, 
    container_name=container_name,
    account_name=account_name,
    account_key=account_key,
    create_if_not_exists=True
)

ds_predict_path = 'ds-predict'

### Generate input file for prediction based on sample data
**Note**: the timestamps in this file represent the three weeks after the end of the simulated data. Also note that all item 
quantities are set to 0.  When finished, the resulting data set will contain a new column with predictions for each row in 
the time series data.

In [None]:
skus = ['cdc_00', 'crc_90']
coolers = [0, 1, 2, 3, 4]
hours_rows = 504
start_date = '2022-01-01 00:00:00'
end_date = '2022-01-21 23:00:00'

from azure.storage.blob import BlobServiceClient

blob_service_client = BlobServiceClient(f"https://{account_name}.blob.core.windows.net",account_key)

for cooler in coolers:
    for sku in skus:
        timestamps = pd.date_range(start=start_date, end=end_date, periods=hours_rows)
        list_sku = [sku] * hours_rows
        list_cooler = [cooler] * hours_rows
        list_quantity = [0] * hours_rows
        row_def = {'PickListFulfilledTimestamp':timestamps,'CoolerId': list_cooler,'ItemSku':list_sku, 'ItemQuantity':list_quantity}
        df = pd.DataFrame(data=row_def)
        df.index.name = 'PickListId'
        output = df.to_csv (index=True, encoding = "utf-8")
        blob_client = blob_service_client.get_blob_client(container=container_name, blob=f"{ds_predict_path}/{cooler}-{sku}.csv")
        blob_client.upload_blob(output, blob_type="BlockBlob")

In [None]:

# Create file datasets
ds_predict = Dataset.File.from_files(path=datastore.path(ds_predict_path), validate=True)

# Register the file datasets
dataset_name = 'coolerdataset'
predict_dataset_name = dataset_name + "_predict"
ds_predict.register(ws, predict_dataset_name, create_new_version=True)

In [None]:


# Get a named datastore from the current workspace
dstore = Datastore.get(ws, datastore_name='automl_many_models')

In [None]:
from azureml.core.compute import AmlCompute, ComputeTarget

# Choose a name for your cluster.
amlcompute_cluster_name = "cpucluster"

found = False
# Check if this compute target already exists in the workspace.
cts = ws.compute_targets
if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == 'AmlCompute':
    found = True
    print('Found existing compute target.')
    compute = cts[amlcompute_cluster_name]
    
if not found:
    print('Creating a new compute target...')
    provisioning_config = AmlCompute.provisioning_configuration(vm_size='STANDARD_D16S_V3',
                                                           min_nodes=2,
                                                           max_nodes=20)
    # Create the cluster.
    compute = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)
    
print('Checking cluster status...')
# Can poll for a minimum number of nodes and for a specific timeout.
# If no min_node_count is provided, it will use the scale settings for the cluster.
compute.wait_for_completion(show_output = True, min_node_count = None, timeout_in_minutes = 20)
    
# For a more detailed view of current AmlCompute status, use get_status().

In [None]:
from azureml.core import Experiment
from azureml.core.dataset import Dataset

experiment = Experiment(ws, 'manymodels-prediction-pipeline')

filedst_10_models = Dataset.get_by_name(ws, name='coolerdataset_predict', version='latest')
filedst_10_models_input = filedst_10_models.as_named_input('forecast_10_models')

training_experiment_name = "connectedcooler-training-pipeline"
training_pipeline_run_id ="<Training Pipeline Run ID from Notebook Output"

In [None]:
from azureml.contrib.automl.pipeline.steps import AutoMLPipelineBuilder

partition_column_names = ['CoolerId', 'ItemSku']

inference_steps = AutoMLPipelineBuilder.get_many_models_batch_inference_steps(experiment=experiment,
                                                                              inference_data=filedst_10_models_input,
                                                                              compute_target=compute,
                                                                              node_count=2,
                                                                              process_count_per_node=8,
                                                                              run_invocation_timeout=300,
                                                                              output_datastore=dstore,
                                                                              train_experiment_name=training_experiment_name,
                                                                              train_run_id=training_pipeline_run_id,
                                                                              partition_column_names=partition_column_names,
                                                                              time_column_name="PickListFulfilledTimestamp",
                                                                              target_column_name="ItemQuantity")

### Submit the job to the inferencing pipeline

In [None]:
from azureml.pipeline.core import Pipeline

pipeline = Pipeline(workspace = ws, steps=inference_steps)
run = experiment.submit(pipeline)

### Retrieve the prediction

In [None]:
import pandas as pd
import shutil
import os
import sys 
from scripts.helper import get_forecasting_output

forecasting_results_name = "forecasting_results"
forecasting_output_name = "many_models_inference_output"

forecast_file = get_forecasting_output(run, forecasting_results_name, forecasting_output_name)
df = pd.read_csv(forecast_file, delimiter=" ", header=None)
df.columns = ["PickListId", "PickListFulfilledTimestamp", "CoolerId", "ItemSku",  "ItemQuantity","prediction"]
print("Prediction has ", df.shape[0], " rows. Here the first 10 rows are being displayed.")
df.head(10)

### From here, create the Inventory Projected table using the data from the forecast file.