In [5]:
from azureml.core import Workspace
ws = Workspace.from_config() 
print('Workspace name : '+ws.name,
    '\nAzure region : '+ws.location,
    '\nSubscription ID : '+ws.subscription_id,
    '\nResource group : '+ws.resource_group
)

Workspace name : labuser91ml 
Azure region : eastus2 
Subscription ID : 27db5ec6-d206-4028-b5e1-6004dca5eeef 
Resource group : rg91


In [6]:
# 실험공간을 준비
from azureml.core import Experiment 
experiment = Experiment(workspace=ws,name='diabetes-experiment')

In [13]:
#데이터를 준비한다.
from azureml.opendatasets import Diabetes
from sklearn.model_selection import train_test_split

x_df=Diabetes.get_tabular_dataset().to_pandas_dataframe().dropna() #tabular : 테이블 형태의 데이터
y_df=x_df.pop('Y') #Y값(label)을 추출하겠다

X_train,X_test,y_train,y_test= train_test_split(x_df,y_df,test_size=0.2,random_state=66)

print(X_train)

     AGE  SEX   BMI     BP   S1     S2    S3    S4      S5   S6
440   36    1  30.0   95.0  201  125.2  42.0  4.79  5.1299   85
389   47    2  26.5   70.0  181  104.8  63.0  3.00  4.1897   70
5     23    1  22.6   89.0  139   64.8  61.0  2.00  4.1897   68
289   28    2  31.5   83.0  228  149.4  38.0  6.00  5.3132   83
101   53    2  22.2  113.0  197  115.2  67.0  3.00  4.3041  100
..   ...  ...   ...    ...  ...    ...   ...   ...     ...  ...
122   62    2  33.9  101.0  221  156.4  35.0  6.00  4.9972  103
51    65    2  27.9  103.0  159   96.8  42.0  4.00  4.6151   86
119   53    1  22.0   94.0  175   88.0  59.0  3.00  4.9416   98
316   53    2  27.7   95.0  190  101.8  41.0  5.00  5.4638  101
20    35    1  21.1   82.0  156   87.8  50.0  3.00  4.5109   95

[353 rows x 10 columns]


In [19]:
# 모델 훈련, 로그, 모델 파일 관리
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error
from sklearn.externals import joblib #학습시킨것을 file(.pkl)로 저장
import math

alphas=[0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1]

for alpha in alphas:
    
    #실험의 기록
    run=experiment.start_logging() 
    run.log('alpha_value',alpha) #실험단위로 log 남기기

    model=Ridge(alpha=alpha)
    model.fit(X_train,y_train)
    y_pred = model.predict(X_test)
    mse=mean_squared_error(y_test,y_pred)
    rmse=math.sqrt(mean_squared_error(y_test,y_pred))

    run.log('rmse',rmse) #실험단위로 log 남기기

    print('model_alpha={0}, rmse={1}, mse={2}'.format(alpha,rmse,mse))

    # 모델을 파일로 저장
    model_name='model_alpha_'+str(alpha)+'.pkl'
    filename='outputs/'+model_name #경로지정

    joblib.dump(value=model,filename=filename) #모델 파일(.pkl)을 임시저장하기, dump() 저장명령

    #Azure ML Service 에 모델 파일을 업로드 하는 부분
    run.upload_file(name=model_name,path_or_stream=filename)

    run.complete() #실험 종료

    print(f'{alpha} experiment completed') #alpha 값에 해당되는 실험이 끝났다

model_alpha=0.1, rmse=56.60520331339142, mse=3204.149042150379
0.1 experiment completed
model_alpha=0.2, rmse=56.61060264545032, mse=3204.7603318810666
0.2 experiment completed
model_alpha=0.3, rmse=56.616243245483616, mse=3205.3989992317693
0.3 experiment completed
model_alpha=0.4, rmse=56.622107088710145, mse=3206.0630111653595
0.4 experiment completed
model_alpha=0.5, rmse=56.62817734275138, mse=3206.7504691621007
0.5 experiment completed
model_alpha=0.6, rmse=56.63443828302745, mse=3207.459599634045
0.6 experiment completed
model_alpha=0.7, rmse=56.64087521475942, mse=3208.188745093948
0.7 experiment completed
model_alpha=0.8, rmse=56.64747440101076, mse=3208.9363560131696
0.8 experiment completed
model_alpha=0.9, rmse=56.654222996253125, mse=3209.7009833091765
0.9 experiment completed
model_alpha=1, rmse=56.66110898499056, mse=3210.4812714089785
1 experiment completed


In [20]:
experiment #실험객체

Name,Workspace,Report Page,Docs Page
diabetes-experiment,labuser91ml,Link to Azure Machine Learning studio,Link to Documentation


In [24]:
#Best Model을 탐색 후 다운로드
minimum_rmse=None
minimum_rmse_runid=None 

for exp in experiment.get_runs(): #get_runs() experiment 아래 있는 값을 가져옴
    run_metircs = exp.get_metrics() #지표값을 불러옴
    run_details = exp.get_details()

    run_rmse=run_metircs['rmse']
    run_id=run_details['runId']

    #가장 낮은 RMSE 값을 가진 실행ID를 구하는 부분
    if minimum_rmse is None:
        minimum_rmse=run_rmse
        minimum_rmse_runid=run_id
    else:
        if run_rmse < minimum_rmse:
            minimum_rmse=run_rmse
            minimum_rmse_runid=run_id

print("Best run_id: "+minimum_rmse_runid)
print("Best run_id rmse: "+str(minimum_rmse))

Best run_id: a2aa9ee2-5434-4f9b-9850-8fd56c488cbf
Best run_id rmse: 56.60520331339142


In [27]:
from azureml.core import Run
best_run = Run(experiment=experiment,run_id=minimum_rmse_runid)
print(best_run.get_file_names())

best_run.download_file(name=str(best_run.get_file_names()[0]))

['model_alpha_0.1.pkl', 'outputs/model_alpha_0.1.pkl']
