# Experiment with parameters for a Ridge Regression Model on the Diabetes Dataset

This notebook is for experimenting with different parameters to train a ridge regression model on the Diabetes dataset.

In [None]:
# Change out of the experimentation directory
%cd ..

In [None]:
import azureml.core
from azureml.core import Workspace

In [None]:
# Load the workspace from the saved config file
ws = Workspace.from_config()

In [None]:
import os, shutil

# Create a folder for the experiment files
training_folder = 'diabetes-training'
os.makedirs(training_folder, exist_ok=True)

# Copy the data file into the experiment folder
shutil.copy('data/diabetes.csv', os.path.join(training_folder, "diabetes.csv"))

# Copy the train functions into the experiment folder
shutil.copy('diabetes_regression/training/train.py', os.path.join(training_folder, "train.py"))

In [None]:
%%writefile $training_folder/parameters.json
{
    "training":
    {
        "alpha": 0.3
    },
    "evaluation":
    {

    },
    "scoring":
    {
        
    }
}


In [None]:
%%writefile $training_folder/diabetes_training.py
# Import libraries
from azureml.core import Run
import json
import os
import pandas as pd
import shutil

from train import split_data, train_model

# Get the experiment run context
run = Run.get_context()

# load the diabetes dataset
print("Loading Data...")
train_df = pd.read_csv('diabetes.csv')

data = split_data(train_df)

# Specify the parameters to test
with open("parameters.json") as f:
    pars = json.load(f)
    train_args = pars["training"]

# Log parameters
for k, v in train_args.items():
    run.log(k, v)

model, metrics = train_model(data, train_args)

# Log metrics
for k, v in metrics.items():
    run.log(k, v)

# Save the parameters file to the outputs folder
os.makedirs('outputs', exist_ok=True)
shutil.copy('parameters.json', os.path.join('outputs', 'parameters.json'))
    
run.complete()

In [None]:
from azureml.train.estimator import Estimator
from azureml.core import Experiment

# Create an estimator
estimator = Estimator(source_directory=training_folder,
                      entry_script='diabetes_training.py',
                      compute_target='local',
                      conda_packages=['scikit-learn']
                      )

# Create an experiment
experiment_name = 'diabetes-training'
experiment = Experiment(workspace = ws, name = experiment_name)

# Run the experiment based on the estimator
run = experiment.submit(config=estimator)
run.wait_for_completion(show_output=True)

In [None]:
metrics = run.get_metrics()
for k, v in metrics.items():
        print(k, v)

In [None]:
for file in run.get_file_names():
    print(file)