In [2]:
from azureml.core import Workspace
ws = Workspace.from_config()
print('Workspace name: ' + ws.name,
        'Azure region ' + ws.location,
        'Subscription ID: ' + ws.subscription_id,
        'Resource Group: ' + ws.resource_group,
        sep='\n'
)

Workspace name: labuser17ml
Azure region koreacentral
Subscription ID: f5bc93f2-df0a-4b1a-ab9e-2b0203fc7d26
Resource Group: rg17


In [8]:
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name='diabetes_experiment')

In [9]:
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
y_df = x_df.pop('Y')

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=6)

In [10]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib
import math

alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]

for alpha in alphas:
    run = experiment.start_logging()
    run.log('alpha_value', alpha)

    model = Ridge(alpha=alpha)
    model.fit(X=X_train, y=y_train)

    y_pred = model.predict(X=X_test)
    rmse = math.sqrt(mean_squared_error(y_true=y_test, y_pred=y_pred))
    run.log('rmse', rmse)

    model_name = 'model_alpha_' + str(alpha) + '.pkl'
    filename = 'outputs/' + model_name

    joblib.dump(value=model, filename=filename)
    run.upload_file(name=model_name, path_or_stream=filename)
    run.complete()

    print(f'{alpha} exp completed')

0.1 exp completed
0.2 exp completed
0.3 exp completed
0.4 exp completed
0.5 exp completed
0.6 exp completed
0.7 exp completed
0.8 exp completed
0.9 exp completed
1 exp completed


In [11]:
experiment

Name,Workspace,Report Page,Docs Page
diabetes_experiment,labuser17ml,Link to Azure Machine Learning studio,Link to Documentation


In [12]:
minimum_rmse_runid = None
minimum_rmse = None

for run in experiment.get_runs():
    run_metrics = run.get_metrics()
    run_details = run.get_details()

    run_rmse = run_metrics['rmse']
    run_id = run_details['runId']

    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

print("Best run_id:", minimum_rmse_runid)
print("Best run_rsme:", str(minimum_rmse))

Best run_id: 1486dbf3-1087-4a6a-80aa-cc5128ff418c
Best run_rsme: 54.205664405276664


In [14]:
from azureml.core import Run
best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)
print(best_run.get_file_names())

['model_alpha_1.pkl', 'outputs/model_alpha_0.1.pkl', 'outputs/model_alpha_0.2.pkl', 'outputs/model_alpha_0.3.pkl', 'outputs/model_alpha_0.4.pkl', 'outputs/model_alpha_0.5.pkl', 'outputs/model_alpha_0.6.pkl', 'outputs/model_alpha_0.7.pkl', 'outputs/model_alpha_0.8.pkl', 'outputs/model_alpha_0.9.pkl', 'outputs/model_alpha_1.pkl']


In [15]:
best_run.download_file(name=str(best_run.get_file_names()[0]))

In [17]:
import numpy as np
from azureml.core import Dataset

np.savetxt('features.csv', X_train, delimiter=',')
np.savetxt('labels.csv', y_train, delimiter=',')

datastore = ws.get_default_datastore()
datastore.upload_files(
    files=['./features.csv', './labels.csv'],
    target_path='diabetes-experiment/',
    overwrite=True
)

Uploading an estimated of 2 files
Uploading ./features.csv
Uploaded ./features.csv, 1 files out of an estimated total of 2
Uploading ./labels.csv
Uploaded ./labels.csv, 2 files out of an estimated total of 2
Uploaded 2 files


$AZUREML_DATAREFERENCE_5b475ee528794671928d8cf8349e5ca6