# The Course Code for Azure Machine Learning

### Setup this notebook with the existing experiment

In [None]:
import azureml.core
import pandas as pd
import numpy as np
import logging

from azureml.core import Workspace, Experiment, Dataset
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget
from azureml.train.automl.run import AutoMLRun
from datetime import datetime

# Recreate our Local variables
target_column_name = 'cnt'
time_column_name = 'date'
max_horizon = 14

# Connect to the workspace and experiment again
ws = Workspace('INSERT_YOUR_SUBSCRIPTION_ID', 'INSERT_YOUR_RESOURCE_GROUP_NAME', 'INSERT_YOUR_RESOURCE_NAME')
experiment_name = 'INSERT_YOUR_EXPERIMENT_NAME'
experiment = Experiment(ws, experiment_name)

# Connect to the compute cluster again
amlcompute_cluster_name = "INSERT_YOUR_CLUSTER_NAME"
cts = ws.compute_targets
compute_target = cts[amlcompute_cluster_name]

# Connect to the dataset from within the datastore again
datastore = ws.get_default_datastore()
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'dataset/bike-no.csv')]).with_timestamp_columns(fine_grain_timestamp=time_column_name) 
dataset.to_pandas_dataframe().reset_index(drop=True)

# Connect to the most recent run again
run_id = 'INSERT_YOUR_RUN_ID'
remote_run = AutoMLRun(experiment, run_id)
remote_run

# Recreate the test data
test = dataset.time_after(datetime(2012, 9, 1), include_boundary=True)
test.to_pandas_dataframe().head(5).reset_index(drop=True)

### Get the information from the run and model with the best results

In [None]:
remote_run.get_details()

In [None]:
best_run, fitted_model = remote_run.get_output()
fitted_model.steps

### Get the features from the fitted run

In [None]:
fitted_model.named_steps['timeseriestransformer'].get_engineered_feature_names()

In [None]:
featurization_summary = fitted_model.named_steps['timeseriestransformer'].get_featurization_summary()
pd.DataFrame.from_records(featurization_summary)

Before continuing, make sure you have uploaded the files from the forecast folder with:
- `forecasting_helper.py`
- `forecasting_script.py`
- `metrics_helper.py`
- `run_forecast.py`

You can find these on the [course repo in the project folder](https://github.com/microsoft/c9-dev-intro-data-science/tree/main/regression-with-bikes/forecast/) and it should be uploaded to your Azure Machine Learning Studio notebook folder where this notebook is running.

### Test the model with a rolling forecast

In [None]:
from run_forecast import run_rolling_forecast

test_experiment = Experiment(ws, experiment_name + '_test')
remote_run = run_rolling_forecast(test_experiment, compute_target, 
                                  best_run, test, max_horizon, 
                                  target_column_name, time_column_name)
remote_run

In [None]:
remote_run.wait_for_completion(show_output=False)

### Evaluate the output of the test run predictions

In [None]:
remote_run.download_file('outputs/predictions.csv', 'predictions.csv')
df_all = pd.read_csv('predictions.csv')

In [None]:
from azureml.automl.core._vendor.automl.client.core.common import metrics
from sklearn.metrics import mean_absolute_error, mean_squared_error
from matplotlib import pyplot as plt
from automl.client.core.common import constants

print("Simple forecasting model")
rmse = np.sqrt(mean_squared_error(df_all[target_column_name], df_all['predicted']))
print("[Test Data] \nRoot Mean Squared Error: %.2f" % rmse)

mae = mean_absolute_error(df_all[target_column_name], df_all['predicted'])
print('mean_absolute_error score: %.2f' % mae)

mape = MAPE(df_all[target_column_name], df_all['predicted'])
print('MAPE: %.2f' % mape)

### Visualize the accuracy with mean absolute percentage error

In [None]:
# Plot outputs
%matplotlib inline
test_pred = plt.scatter(df_all[target_column_name], df_all['predicted'], color='b')
test_test = plt.scatter(df_all[target_column_name], df_all[target_column_name], color='g')
plt.legend((test_pred, test_test), ('prediction', 'truth'), loc='upper left', fontsize=8)
plt.show()

### Modify the statistical method for a more accurate representation

In [None]:
from metrics_helper import MAPE, APE
df_all.groupby('horizon_origin').apply(
    lambda df: pd.Series({'MAPE': MAPE(df[target_column_name], df['predicted']),
                          'RMSE': np.sqrt(mean_squared_error(df[target_column_name], df['predicted'])),
                          'MAE': mean_absolute_error(df[target_column_name], df['predicted'])}))

### Visualize the accurate scoring of the model

In [None]:
df_all_APE = df_all.assign(APE=APE(df_all[target_column_name], df_all['predicted']))
APEs = [df_all_APE[df_all['horizon_origin'] == h].APE.values for h in range(1, max_horizon + 1)]

%matplotlib inline
plt.boxplot(APEs)
plt.yscale('log')
plt.xlabel('horizon')
plt.ylabel('APE (%)')
plt.title('Absolute Percentage Errors by Forecast Horizon')

plt.show()

### Deploy the best model as a web service

In [None]:
run_id = 'AutoML_ae9ab50d-2f83-47a6-8749-db262953c8cb'
training_run = AutoMLRun(experiment, run_id)
training_run

In [None]:
best_run

In [None]:
model_name = best_run.properties['model_name']
model_name

In [None]:
script_file_name = 'inference/score.py'
best_run.download_file('outputs/scoring_file_v_1_0_0.py', 'inference/score.py')

In [None]:
description = "Bike Share AutoML Model"
model = training_run.register_model(model_name = model_name,
                                    description = description,
                                    tags = None)
print(training_run.model_id)

In [None]:
from azureml.core.model import InferenceConfig
from azureml.core.webservice import AciWebservice
from azureml.core.webservice import Webservice
from azureml.core.model import Model
from azureml.core.environment import Environment

inference_config = InferenceConfig(entry_script=script_file_name, environment=best_run.get_environment())

aciconfig = AciWebservice.deploy_configuration(cpu_cores = 1,
                                               memory_gb = 1,
                                               tags = {'type': "automl-forecasting"},
                                               description = 'Sample service for AutoML Forecasting')

aci_service_name = 'automl-bike-sharing'
print(aci_service_name)
aci_service = Model.deploy(ws, aci_service_name, [model], inference_config, aciconfig)
aci_service.wait_for_deployment(True)
print(aci_service.state)

### Test the web service

In [None]:
# Requries that the bike-no-horizon.csv is uploaded to the Azure Machine Learning notebook folder
test_df = pd.read_csv('./bike-no-horizon.csv')
test_df

In [None]:
import json
test_sample = json.dumps({'data': test_df.to_dict(orient='records')})
response = aci_service.run(input_data = test_sample)
response

In [None]:
res_dict = json.loads(response)
y_fcst_all = pd.DataFrame(res_dict['index'])
y_fcst_all[time_column_name] = pd.to_datetime(y_fcst_all[time_column_name], unit = 'ms')
y_fcst_all['forecast'] = res_dict['forecast']

### Visualizing the predictions using the web service

In [None]:
webservice_df = pd.DataFrame({ 'Truth': test_df['cnt'], 'Forecast': y_fcst_all['forecast'] } )
webservice_df['APE'] = APE(webservice_df['Truth'], webservice_df['Forecast'])
webservice_df