In [22]:
from azureml.core import Workspace
ws = Workspace.from_config()    #워크스페이스에 대한 정보
print('Workspace name: '+ ws.name,
'Azure region: ' + ws.location,
'Subscription ID: '+ws.subscription_id,
'Resource group: '+ ws.resource_group)

Workspace name: labuser72ml Azure region: eastus Subscription ID: 27db5ec6-d206-4028-b5e1-6004dca5eeef Resource group: rg72


In [24]:
# 실험공간을 준비
from azureml.core import Experiment
experiment = Experiment(workspace=ws, name ='diabetes-experiment')

In [25]:
# 데이터를 준비한다.
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

x_df = Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna() #NaN값 제외
y_df = x_df.pop('Y')    # Y(label값) 제외하기 - 학습용과 테스트용을 분류하기 위해

X_train, X_test, y_train, y_test = train_test_split(x_df, y_df, test_size=0.2, random_state=66)
print(X_train)

     AGE  SEX   BMI     BP   S1     S2    S3    S4      S5   S6
440   36    1  30.0   95.0  201  125.2  42.0  4.79  5.1299   85
389   47    2  26.5   70.0  181  104.8  63.0  3.00  4.1897   70
5     23    1  22.6   89.0  139   64.8  61.0  2.00  4.1897   68
289   28    2  31.5   83.0  228  149.4  38.0  6.00  5.3132   83
101   53    2  22.2  113.0  197  115.2  67.0  3.00  4.3041  100
..   ...  ...   ...    ...  ...    ...   ...   ...     ...  ...
122   62    2  33.9  101.0  221  156.4  35.0  6.00  4.9972  103
51    65    2  27.9  103.0  159   96.8  42.0  4.00  4.6151   86
119   53    1  22.0   94.0  175   88.0  59.0  3.00  4.9416   98
316   53    2  27.7   95.0  190  101.8  41.0  5.00  5.4638  101
20    35    1  21.1   82.0  156   87.8  50.0  3.00  4.5109   95

[353 rows x 10 columns]


In [26]:
# 모델 훈련, 로그, 모델 파일 관리
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error  #MSE 모듈 가져오기
from sklearn.externals import joblib #사이킷 런으로 학습시킨 것을 파일(pkl)로 저장하는 모듈
import math 

alphas  = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
for alpha in alphas:

    #실험의 기록
    run = experiment.start_logging()  # 로그를 남긴다
    run.log('alpha_value', alpha)

    model = Ridge(alpha=alpha)  # alpha값을 for문으로 돌려보며 값에 따른 성능을 비교해보려고 한다.
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = math.sqrt(mean_squared_error(y_test, y_pred))    #두 개의 값을 비교해서 RMSE값을 구함

    run.log('rmse',rmse)

    print('model_alpha={0}, rmse={1}'.format(alpha,rmse))    

    #모델을 파일로 저장하는 부분
    model_name = 'model_alpha_'+str(alpha)+'.pkl'
    filename = 'outputs/' + model_name

    joblib.dump(value=model, filename=filename)

    #Azure ML Service에 모델 파일을 업로드 하는 부분
    run.upload_file(name=model_name, path_or_stream=filename)

    run.complete()

    print(f'{alpha} experiment completed')

model_alpha=0.1, rmse=56.605203313391435
0.1 experiment completed
model_alpha=0.2, rmse=56.61060264545031
0.2 experiment completed
model_alpha=0.3, rmse=56.61624324548362
0.3 experiment completed
model_alpha=0.4, rmse=56.62210708871013
0.4 experiment completed
model_alpha=0.5, rmse=56.628177342751385
0.5 experiment completed
model_alpha=0.6, rmse=56.63443828302744
0.6 experiment completed
model_alpha=0.7, rmse=56.64087521475942
0.7 experiment completed
model_alpha=0.8, rmse=56.64747440101076
0.8 experiment completed
model_alpha=0.9, rmse=56.65422299625313
0.9 experiment completed
model_alpha=1, rmse=56.661108984990555
1 experiment completed


In [27]:
experiment

Name,Workspace,Report Page,Docs Page
diabetes-experiment,labuser72ml,Link to Azure Machine Learning studio,Link to Documentation


In [28]:
# Best Model을 탐색 후 다운로드
minimum_rmse = None
minimum_rmse_runid = None

for exp in experiment.get_runs():
    run_metrics = exp.get_metrics()
    run_details = exp.get_details()

    run_rmse = run_metrics['rmse']
    run_id = run_details['runId']

    #가장 낮은 RMSE 값을 가진 실행 ID를 구하는 부분
    if minimum_rmse is None:
        minimum_rmse = run_rmse
        minimum_rmse_runid = run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse = run_rmse
            minimum_rmse_runid = run_id

print('Best run_id: '+ minimum_rmse_runid)
print('Best run_id_rmse: '+ str(minimum_rmse))

Best run_id: 6b8431d8-09ea-40c3-a317-ddd070adb9b1
Best run_id_rmse: 56.605203313391435


In [29]:
from azureml.core import Run 
best_run = Run(experiment=experiment, run_id=minimum_rmse_runid)
print(best_run.get_file_names())

best_run.download_file(name=str(best_run.get_file_names()[0]))

['model_alpha_0.1.pkl', 'outputs/.amlignore', 'outputs/.amlignore.amltmp', 'outputs/model_alpha_0.1.pkl', 'outputs/model_alpha_0.2.pkl', 'outputs/model_alpha_0.3.pkl', 'outputs/model_alpha_0.4.pkl', 'outputs/model_alpha_0.5.pkl', 'outputs/model_alpha_0.6.pkl', 'outputs/model_alpha_0.7.pkl', 'outputs/model_alpha_0.8.pkl', 'outputs/model_alpha_0.9.pkl', 'outputs/model_alpha_1.pkl']
