# The Course Code for Visual Studio Code

### Import packages and libraries

In [None]:
# Requires that Python, AzureMLSDK, Pandas, and NumPy are already installed
import azureml.core
import pandas as pd
import numpy as np
import logging

print("AzureML SDK Version: ", azureml.core.VERSION)

### Create a new Azure Machine Learning workspace and experiment

In [None]:
from azureml.core import Workspace, Experiment

# Requires that you download your config.json file from Azure Machine Learning to this folder
ws = Workspace.from_config()

# Can be named anything
experiment_name = "automl_bikeshare_forecast"

# This line will have output that requires you authenticate with Azure in the browser
experiment = Experiment(ws, experiment_name)

### Create the compute cluster on Azure Machine Learning

In [None]:
from azureml.core.compute import AmlCompute
from azureml.core.compute import ComputeTarget

amlcompute_cluster_name = "cpu-cluster"

provisioning_config = AmlCompute.provisioning_configuration(vm_size="STANDARD_D2_V2", max_nodes = 4)

compute_target = ComputeTarget.create(ws, amlcompute_cluster_name, provisioning_config)

compute_target.wait_for_completion(show_output=True, min_node_count=None, timeout_in_minutes=20)

### Create a datastore on Azure Machine Learning and upload data

In [None]:
datastore = ws.get_default_datastore()

# Requires the bike-no.csv file in this folder
datastore.upload_files(files = ['bike-no.csv'], target_path='dataset/', overwrite=True, show_progress=True)


### Clean and prepare data

In [None]:
from azureml.core import Dataset
from datetime import datetime

time_column_name = 'date'
target_column_name = 'cnt'

dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'dataset/bike-no.csv')]).with_timestamp_columns(fine_grain_timestamp=time_column_name)

dataset.take(5).to_pandas_dataframe().reset_index(drop=True)

### Split data between training and testing data

In [None]:
train = dataset.time_before(datetime(2012,8,31), include_boundary=True)
train.to_pandas_dataframe().tail(5).reset_index(drop=True)

In [None]:
test = dataset.time_after(datetime(2012,9,1), include_boundary=True)
test.to_pandas_dataframe().head(5).reset_index(drop=True)

### Create the AutoML Config file and run the experiment on Azure

In [None]:
from azureml.train.automl import AutoMLConfig

time_series_settings = {
    'time_column_name': time_column_name,
    'max_horizon': 14,
    'country_or_region': 'US',
    'target_lags': 'auto',
    'drop_column_names': ['casual', 'registered']
}

automl_config = AutoMLConfig(task='forecasting',
                            primary_metric='normalized_root_mean_squared_error',
                            blacklist_models=['ExtremeRandomTrees'],
                            experiment_timeout_minutes=30,
                            training_data=train,
                            label_column_name=target_column_name,
                            compute_target=compute_target,
                            enable_early_stopping=True,
                            n_cross_validations=3,
                            max_concurrent_iterations=4,
                            max_cores_per_iteration=-1,
                            verbosity=logging.INFO,
                            **time_series_settings)

In [None]:
remote_run = experiment.submit(automl_config, show_output=False)
remote_run

In [None]:
remote_run.wait_for_completion()