In [2]:
from azureml.core.runconfig import RunConfiguration
from azureml.core import Workspace, Experiment, ScriptRunConfig
import json
from azureml.core.authentication import AzureCliAuthentication
from sklearn.externals import joblib

In [15]:
with open("./configuration/config.json") as f:
    config = json.load(f)

workspace_name = config["workspace_name"]
resource_group = config["resource_group"]
subscription_id = config["subscription_id"]
location = config["location"]


ws = Workspace.get(
        name=workspace_name,
        subscription_id=subscription_id,
        resource_group=resource_group,
    )

#print('Workspace name: ' + ws.name, 'Azure region: ' + ws.location, 'Subscription id: ' + ws.subscription_id,'Resource group: ' + ws.resource_group, sep='\n')

In [6]:
datastore = ws.get_default_datastore()
datastore.upload_files(files = ['./data/robberies.csv'],
                       target_path = 'timeseries-dataset/tabular/',
                       overwrite = True,
                       show_progress = True)

Uploading an estimated of 1 files
Uploading ./data/robberies.csv
Uploaded ./data/robberies.csv, 1 files out of an estimated total of 1
Uploaded 1 files


$AZUREML_DATAREFERENCE_3ce0eef337c74c51a17092bf0fd6d60e

In [7]:
from azureml.core import Dataset
dataset = Dataset.Tabular.from_delimited_files(path = [(datastore, 'timeseries-dataset/tabular/robberies.csv')])


In [9]:
# preview the first 3 rows of the dataset
dataset.take(3).to_pandas_dataframe()

Unnamed: 0,Month,Monthly Boston armed robberies Jan.1966-Oct.1975 Deutsch and Alt (1977)
0,1966-01,41
1,1966-02,39
2,1966-03,50


In [14]:
# Attach Experiment
experiment_name = "arima-localrun"
exp = Experiment(workspace=ws, name=experiment_name)
print(exp.name, exp.workspace.name, sep="\n")

arima-localrun
ShivaMLservice


In [16]:
# Editing a run configuration property on-fly.
run_config_user_managed = RunConfiguration()
run_config_user_managed.environment.python.user_managed_dependencies = True

In [18]:
with open('./scripts/training/arima.py', 'r') as f:
    print(f.read())



import os

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import json
from sklearn.externals import joblib

from pandas import Grouper
#from pandas.plotting import lag_plot
#from pandas.plotting import autocorrelation_plot
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
#from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.stattools import adfuller
#from sklearn.model_selection import TimeSeriesSplit
#from statsmodels.graphics.gofplots import qqplot
#from statsmodels.tsa.ar_model import AR
from statsmodels.tsa.arima_model import ARIMA

from azureml.core import Dataset, Run

run = Run.get_context()
# get input dataset by name
dataset = run.input_datasets['robberies']

df = dataset.to_pandas_dataframe()
df.index = df['Month']
df = df.drop('Month', axis=1)
df.columns = ['Robberies']

In [52]:
ws = run.experiment.workspace
ds = Dataset.get_by_name(workspace=ws, name='robberies')
ds.take(3).to_pandas_dataframe()

Unnamed: 0,Month,Monthly Boston armed robberies Jan.1966-Oct.1975 Deutsch and Alt (1977)
0,1966-01,41
1,1966-02,39
2,1966-03,50


In [56]:
run.get_context()

<azureml.core.run._OfflineRun at 0x7fc209dd3128>

In [78]:
print("Submitting an experiment.")

#script_arguments = [dataset.as_named_input('robberies')]

src = ScriptRunConfig(
    source_directory="./scripts",
    script="training/arima.py",
    run_config=run_config_user_managed,
)
run = exp.submit(src)



Submitting an experiment.


In [79]:
# Shows output of the run on stdout.
run.wait_for_completion(show_output=True, wait_post_processing=True)

RunId: arima-localrun_1587013719_b24659b9
Web View: https://ml.azure.com/experiments/arima-localrun/runs/arima-localrun_1587013719_b24659b9?wsid=/subscriptions/46668180-b0ad-4a49-bed9-88f16f315dce/resourcegroups/MLGroup/workspaces/ShivaMLservice

Streaming azureml-logs/70_driver_log.txt

Starting the daemon thread to refresh tokens in background for process with pid = 12862
Entering Run History Context Manager.
Preparing to call script [ training/arima.py ] with arguments: []
After variable expansion, calling script [ training/arima.py ] with arguments: []

mean1=89.135593, mean2=303.440678
variance1=2116.727377, variance2=7432.382074
ADF Statistic: 1.001102
p-value: 0.994278
Critical Values:
	1%: -3.494
	5%: -2.889
	10%: -2.582
  return rho, np.sqrt(sigmasq)
                             ARIMA Model Results                              
Dep. Variable:                   D2.y   No. Observations:                   75
Model:                 ARIMA(4, 2, 1)   Log Likelihood                -3



predicted=287.848038, expected=312.000000
predicted=318.398549, expected=249.000000
predicted=272.799487, expected=286.000000
predicted=304.379549, expected=279.000000
predicted=282.078519, expected=309.000000
predicted=309.660504, expected=401.000000
predicted=362.169825, expected=309.000000
predicted=313.765581, expected=328.000000
predicted=347.434314, expected=353.000000
predicted=346.562508, expected=354.000000
predicted=371.604039, expected=327.000000
predicted=341.851400, expected=324.000000
predicted=343.971526, expected=285.000000
predicted=316.628713, expected=243.000000
predicted=283.798999, expected=241.000000
predicted=265.354565, expected=287.000000
predicted=287.194254, expected=355.000000
predicted=328.890021, expected=460.000000
predicted=417.232433, expected=364.000000
predicted=341.824399, expected=487.000000
predicted=462.048320, expected=452.000000
predicted=446.117337, expected=391.000000
predicted=440.849572, expected=500.000000
predicted=469.239731, expected=451

{'runId': 'arima-localrun_1587013719_b24659b9',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2020-04-16T05:08:42.004154Z',
 'endTimeUtc': '2020-04-16T05:09:15.9285Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '1f4c86ba-209d-46e6-b4af-d63e7abc66e0',
  'azureml.git.repository_uri': 'git@github.com:us-ocp-ai/TimeSeriesForecastingInPython.git',
  'mlflow.source.git.repoURL': 'git@github.com:us-ocp-ai/TimeSeriesForecastingInPython.git',
  'azureml.git.branch': 'master',
  'mlflow.source.git.branch': 'master',
  'azureml.git.commit': 'ffed695d24a1276ddc3995b2808b27eabf4d1382',
  'mlflow.source.git.commit': 'ffed695d24a1276ddc3995b2808b27eabf4d1382',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [{'dataset': {'id': 'fd9573e1-df9e-42b7-bb7f-0b8dbd09c6e3'}, 'consumptionDetails': {'type': 'Reference'}}],
 'runDefinition': {'script': 'training/arima.py',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'f

In [80]:
# Raise exception if run fails
if run.get_status() == "Failed":
    raise Exception(
        "Training on local failed with following run status: {} and logs: \n {}".format(
            run.get_status(), run.get_details_with_logs()
        )
    )