# 1. Preparation

## 1.1. Basic Settings

In [1]:
# Import modules 
from azureml.core import Workspace

In [2]:
# Define stauts of the current settings
ws = Workspace.from_config()

In [3]:
# Check information
print('Workspace Same: ' + ws.name,
        '\nAzure Region: ' + ws.location,
        '\nSubscription ID: ' + ws.subscription_id,
        '\nResource Group: ' + ws.resource_group)

Workspace Same: labmeow94ml 
Azure Region: australiaeast 
Subscription ID: 27db5ec6-d206-4028-b5e1-6004dca5eeef 
Resource Group: rg94


## 1.2. Settings for Experiment

In [4]:
# Prepare 

from azureml.core import Experiment

In [5]:
# Define Experiment
experiment = Experiment(workspace=ws,
                        name='diabetes-experiment')

## 1.3. Prepare Dataset

In [6]:
# Import modules
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

In [7]:
# Check the original dataset
Diabetes.get_tabular_dataset().to_pandas_dataframe()
# to_pandas_dataframe(): Bring data as a type of Pandas DataFrame

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y
0,59,2,32.1,101.00,157,93.2,38.0,4.00,4.8598,87,151
1,48,1,21.6,87.00,183,103.2,70.0,3.00,3.8918,69,75
2,72,2,30.5,93.00,156,93.6,41.0,4.00,4.6728,85,141
3,24,1,25.3,84.00,198,131.4,40.0,5.00,4.8903,89,206
4,50,1,23.0,101.00,192,125.4,52.0,4.00,4.2905,80,135
...,...,...,...,...,...,...,...,...,...,...,...
437,60,2,28.2,112.00,185,113.8,42.0,4.00,4.9836,93,178
438,47,2,24.9,75.00,225,166.0,42.0,5.00,4.4427,102,104
439,60,2,24.9,99.67,162,106.6,43.0,3.77,4.1271,95,132
440,36,1,30.0,95.00,201,125.2,42.0,4.79,5.1299,85,220


# 2. Data Preparation

In [8]:
# Create X-axis

x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna()
# dropna(): Drop 'None' values

x_df

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6,Y
0,59,2,32.1,101.00,157,93.2,38.0,4.00,4.8598,87,151
1,48,1,21.6,87.00,183,103.2,70.0,3.00,3.8918,69,75
2,72,2,30.5,93.00,156,93.6,41.0,4.00,4.6728,85,141
3,24,1,25.3,84.00,198,131.4,40.0,5.00,4.8903,89,206
4,50,1,23.0,101.00,192,125.4,52.0,4.00,4.2905,80,135
...,...,...,...,...,...,...,...,...,...,...,...
437,60,2,28.2,112.00,185,113.8,42.0,4.00,4.9836,93,178
438,47,2,24.9,75.00,225,166.0,42.0,5.00,4.4427,102,104
439,60,2,24.9,99.67,162,106.6,43.0,3.77,4.1271,95,132
440,36,1,30.0,95.00,201,125.2,42.0,4.79,5.1299,85,220


In [9]:
# Create Y-axis as a Label
y_df = x_df.pop('Y')
y_df

0      151
1       75
2      141
3      206
4      135
      ... 
437    178
438    104
439    132
440    220
441     57
Name: Y, Length: 442, dtype: int64

In [10]:
# Check X-Axis again after Popping the label
x_df

Unnamed: 0,AGE,SEX,BMI,BP,S1,S2,S3,S4,S5,S6
0,59,2,32.1,101.00,157,93.2,38.0,4.00,4.8598,87
1,48,1,21.6,87.00,183,103.2,70.0,3.00,3.8918,69
2,72,2,30.5,93.00,156,93.6,41.0,4.00,4.6728,85
3,24,1,25.3,84.00,198,131.4,40.0,5.00,4.8903,89
4,50,1,23.0,101.00,192,125.4,52.0,4.00,4.2905,80
...,...,...,...,...,...,...,...,...,...,...
437,60,2,28.2,112.00,185,113.8,42.0,4.00,4.9836,93
438,47,2,24.9,75.00,225,166.0,42.0,5.00,4.4427,102
439,60,2,24.9,99.67,162,106.6,43.0,3.77,4.1271,95
440,36,1,30.0,95.00,201,125.2,42.0,4.79,5.1299,85


In [11]:
# Devide data into Train and Test Datasets
X_train, X_test, y_train, y_test = train_test_split(x_df,
                                                    y_df,
                                                    test_size=0.2,
                                                    random_state=66)

print(X_train)

     AGE  SEX   BMI     BP   S1     S2    S3    S4      S5   S6
440   36    1  30.0   95.0  201  125.2  42.0  4.79  5.1299   85
389   47    2  26.5   70.0  181  104.8  63.0  3.00  4.1897   70
5     23    1  22.6   89.0  139   64.8  61.0  2.00  4.1897   68
289   28    2  31.5   83.0  228  149.4  38.0  6.00  5.3132   83
101   53    2  22.2  113.0  197  115.2  67.0  3.00  4.3041  100
..   ...  ...   ...    ...  ...    ...   ...   ...     ...  ...
122   62    2  33.9  101.0  221  156.4  35.0  6.00  4.9972  103
51    65    2  27.9  103.0  159   96.8  42.0  4.00  4.6151   86
119   53    1  22.0   94.0  175   88.0  59.0  3.00  4.9416   98
316   53    2  27.7   95.0  190  101.8  41.0  5.00  5.4638  101
20    35    1  21.1   82.0  156   87.8  50.0  3.00  4.5109   95

[353 rows x 10 columns]


# 3. Train Dataset

In [12]:
# Import modules

from sklearn.linear_model import Ridge          # Algorithm
from sklearn.metrics import mean_squared_error  # Test score
from sklearn.externals import joblib            # Save trained models as *.pkl
import math



In [13]:
# Find the best parameter

alphas = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 ]

for alpha in alphas:

    # Logs - Start value
    run = experiment.start_logging()
    run.log('alpha_value', alpha)

    # Train modles
    model = Ridge(alpha=alpha)
    model.fit(X_train, y_train)

    # Prediction
    y_pred = model.predict(X_test)

    # Mean Squared Error
    #mse = mean_squared_error(y_test, y_pred)
    
    # Squared Mean Squared Error
    rmse = math.sqrt(mean_squared_error(y_test, y_pred))

    # Logs - Result
    run.log('rmse', rmse)

    # Print the result
    print('model_alpha={0}, rmse={1}'.format(alpha, rmse))

    # Set a name of model and path
    model_name = 'model_alpha_' + str(alpha) + '.pkl'  # Set a model name
    filename = 'outputs/' + model_name                 # Set a path
    joblib.dump(value=model, filename=filename)        # Export the model as a file

    # Upload the model file to Azure ML Service
    run.upload_file(name=model_name, path_or_stream=filename)


    #Logs - Complete
    run.complete()

    # Message for end
    print(f'{alpha} experiment completed!')


model_alpha=0.1, rmse=56.605203313391435
0.1 experiment completed!
model_alpha=0.2, rmse=56.61060264545031
0.2 experiment completed!
model_alpha=0.3, rmse=56.61624324548362
0.3 experiment completed!
model_alpha=0.4, rmse=56.62210708871013
0.4 experiment completed!
model_alpha=0.5, rmse=56.628177342751385
0.5 experiment completed!
model_alpha=0.6, rmse=56.63443828302744
0.6 experiment completed!
model_alpha=0.7, rmse=56.64087521475942
0.7 experiment completed!
model_alpha=0.8, rmse=56.64747440101076
0.8 experiment completed!
model_alpha=0.9, rmse=56.65422299625313
0.9 experiment completed!
model_alpha=1.0, rmse=56.661108984990555
1.0 experiment completed!


In [14]:
experiment

Name,Workspace,Report Page,Docs Page
diabetes-experiment,labmeow94ml,Link to Azure Machine Learning studio,Link to Documentation


# 4. Find the Best Model

## 4.1. Find the Best Model

In [15]:
# Define the minimum RSME
minimum_rmse = None
minimum_rmse_runid = None

In [21]:
# Compare models
for exp in experiment.get_runs():   # Get results of experiments
    run_metrics = exp.get_metrics();
    run_details = exp.get_details();

    run_rmse = run_metrics['rmse']
    run_id = run_details['runId']

In [22]:
# Find and Download the Best Model
minimum_rmse = None
minimum_rmse_runid = None

for exp in experiment.get_runs():


    # Find the smallest value of RMSE
    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse <  minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

print('Best run_id:  ' + minimum_rmse_runid)
print('Best run_id rmse:  ' + str(minimum_rmse))

Best run_id:  bb1985de-73d7-46bc-b4bf-0d2bc0fb7a57
Best run_id rmse:  56.64087521475942


## 4.2. Download the Best Model

In [23]:
# Import module
from azureml.core import Run 

In [24]:
# Download the model
best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)
print(best_run.get_file_names())

best_run.download_file(name=str(best_run.get_file_names()[0]))

['model_alpha_0.7.pkl', 'outputs/.amlignore', 'outputs/.amlignore.amltmp', 'outputs/Model_alpha_0.1.pkl', 'outputs/Model_alpha_0.2.pkl', 'outputs/Model_alpha_0.3.pkl', 'outputs/Model_alpha_0.4.pkl', 'outputs/Model_alpha_0.5.pkl', 'outputs/Model_alpha_0.6.pkl', 'outputs/Model_alpha_0.7.pkl', 'outputs/Model_alpha_0.8.pkl', 'outputs/Model_alpha_0.9.pkl', 'outputs/Model_alpha_1.0.pkl']
