# Experiment with parameters for a Ridge Regression Model on the Diabetes Dataset

This notebook is for experimenting with different parameters to train a ridge regression model on the Diabetes dataset.

In [1]:
# Change out of the experimentation directory
%cd ..

C:\Users\brysmith\Source\Repos\MLOpsPython


In [2]:
import azureml.core
from azureml.core import Workspace

In [3]:
# Load the workspace from the saved config file
ws = Workspace.from_config()

Performing interactive authentication. Please follow the instructions on the terminal.




Interactive authentication successfully completed.


In [4]:
import os, shutil

# Create a folder for the experiment files
training_folder = 'diabetes-training'
os.makedirs(training_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('data/diabetes.csv', os.path.join(training_folder, "diabetes.csv"))

# Copy the train functions into the experiment folder
shutil.copy('diabetes_regression/training/train.py', os.path.join(training_folder, "train.py"))

'diabetes-training\\train.py'

In [5]:
%%writefile $training_folder/parameters.json
{
    "training":
    {
        "alpha": 0.3
    },
    "evaluation":
    {

    },
    "scoring":
    {
        
    }
}


Writing diabetes-training/parameters.json


In [6]:
%%writefile $training_folder/diabetes_training.py
# Import libraries
from azureml.core import Run
import pandas as pd
import shutil

from train import split_data, train_model

# Get the experiment run context
run = Run.get_context()

# load the diabetes dataset
print("Loading Data...")
train_df = pd.read_csv('diabetes.csv')

data = split_data(train_df)

# Specify the parameters to test
with open("parameters.json") as f:
    pars = json.load(f)
    train_args = pars["training"]

# Log parameters
for k, v in train_args.items():
    run.log(k, v)

model, metrics = train_model(data, train_args)

# Log metrics
for k, v in metrics.items():
    run.log(k, v)

# Save the parameters file to the outputs folder
os.makedirs('outputs', exist_ok=True)
shutil.copy('parameters.json', os.path.join('outputs', 'parameters.json'))
    
run.complete()

Writing diabetes-training/diabetes_training.py


In [7]:
from azureml.train.estimator import Estimator
from azureml.core import Experiment

# Create an estimator
estimator = Estimator(source_directory=training_folder,
                      entry_script='diabetes_training.py',
                      compute_target='local',
                      conda_packages=['scikit-learn']
                      )

# Create an experiment
experiment_name = 'diabetes-training'
experiment = Experiment(workspace = ws, name = experiment_name)

# Run the experiment based on the estimator
run = experiment.submit(config=estimator)
run.wait_for_completion(show_output=True)

RunId: diabetes-training_1583266166_a12fa3dc
Web View: https://mlworkspace.azure.ai/portal/subscriptions/48d404f6-3a69-4552-a210-b1afe5537cc1/resourceGroups/mlopsohrg/providers/Microsoft.MachineLearningServices/workspaces/mlopsoh-ws/experiments/diabetes-training/runs/diabetes-training_1583266166_a12fa3dc

Streaming azureml-logs/60_control_log.txt

Streaming log file azureml-logs/60_control_log.txt
Starting the daemon thread to refresh tokens in background for process with pid = 14668
Running: ['cmd.exe', '/c', 'C:\\Users\\brysmith\\AppData\\Local\\Temp\\azureml_runs\\diabetes-training_1583266166_a12fa3dc\\azureml-environment-setup/docker_env_checker.bat']

Materialized image not found on target: azureml/azureml_942e102c8fd48681f49452a15f3fb0f4


Logging experiment preparation status in history service.
Running: ['cmd.exe', '/c', 'C:\\Users\\brysmith\\AppData\\Local\\Temp\\azureml_runs\\diabetes-training_1583266166_a12fa3dc\\azureml-environment-setup/docker_env_builder.bat']
Running: ['

Collecting msrest>=0.5.1
  Downloading msrest-0.6.11-py2.py3-none-any.whl (83 kB)
Collecting pytz
  Downloading pytz-2019.3-py2.py3-none-any.whl (509 kB)
Collecting azure-graphrbac>=0.40.0
  Downloading azure_graphrbac-0.61.1-py2.py3-none-any.whl (141 kB)
Collecting six>=1.11.0
  Downloading six-1.14.0-py2.py3-none-any.whl (10 kB)
Collecting SecretStorage
  Downloading SecretStorage-3.1.2-py3-none-any.whl (14 kB)
Collecting azure-mgmt-containerregistry>=2.0.0
  Downloading azure_mgmt_containerregistry-2.8.0-py2.py3-none-any.whl (718 kB)
Collecting azure-mgmt-storage>=1.5.0
  Downloading azure_mgmt_storage-8.0.0-py2.py3-none-any.whl (524 kB)
Collecting azure-common>=1.1.12
  Downloading azure_common-1.1.24-py2.py3-none-any.whl (12 kB)
Collecting cryptography!=1.9,!=2.0.*,!=2.1.*,!=2.2.*
  Downloading cryptography-2.8-cp34-abi3-manylinux2010_x86_64.whl (2.3 MB)
Collecting azure-mgmt-keyvault>=0.40.0
  Downloading azure_mgmt_keyvault-2.1.1-py2.py3-none-any.whl (117 kB)
Collecting contextl


Streaming azureml-logs/70_driver_log.txt

Starting the daemon thread to refresh tokens in background for process with pid = 8
Entering Run History Context Manager.
Preparing to call script [ diabetes_training.py ] with arguments: []
After variable expansion, calling script [ diabetes_training.py ] with arguments: []

Loading Data...


The experiment completed successfully. Finalizing run...
Logging experiment finalizing status in history service.
Starting the daemon thread to refresh tokens in background for process with pid = 8
Cleaning up all outstanding Run operations, waiting 300.0 seconds
2 items cleaning up...
Cleanup took 0.01858043670654297 seconds

Execution Summary
RunId: diabetes-training_1583266166_a12fa3dc
Web View: https://mlworkspace.azure.ai/portal/subscriptions/48d404f6-3a69-4552-a210-b1afe5537cc1/resourceGroups/mlopsohrg/providers/Microsoft.MachineLearningServices/workspaces/mlopsoh-ws/experiments/diabetes-training/runs/diabetes-training_1583266166_a12fa3dc



{'runId': 'diabetes-training_1583266166_a12fa3dc',
 'target': 'local',
 'status': 'Completed',
 'startTimeUtc': '2020-03-03T20:12:24.785499Z',
 'endTimeUtc': '2020-03-03T20:12:33.184738Z',
 'properties': {'_azureml.ComputeTargetType': 'local',
  'ContentSnapshotId': '506b34b4-2bdd-42b8-b717-9c0618480bfb',
  'azureml.git.repository_uri': 'https://github.com/microsoft/MLOpsPython.git',
  'mlflow.source.git.repoURL': 'https://github.com/microsoft/MLOpsPython.git',
  'azureml.git.branch': 'jotaylo/split_train_script',
  'mlflow.source.git.branch': 'jotaylo/split_train_script',
  'azureml.git.commit': '3df51833d143b722e578d0ae84181cb63bf78747',
  'mlflow.source.git.commit': '3df51833d143b722e578d0ae84181cb63bf78747',
  'azureml.git.dirty': 'True'},
 'inputDatasets': [],
 'runDefinition': {'script': 'diabetes_training.py',
  'useAbsolutePath': False,
  'arguments': [],
  'sourceDirectoryDataStore': None,
  'framework': 'Python',
  'communicator': 'None',
  'target': 'local',
  'dataReference

In [8]:
metrics = run.get_metrics()
for k, v in metrics.items():
        print(k, v)

alpha 0.3
mse 3302.673633401725


In [9]:
for file in run.get_file_names():
    print(file)

azureml-logs/60_control_log.txt
azureml-logs/70_driver_log.txt
logs/azureml/8_azureml.log
outputs/parameters.json
