#### ML Flow를 활용한 BPED 과제 적용 예제
##### 1. 클라이언트 및 Experiments 설정
>    * Experiments 조회 & 선택
##### 2. Unseen Data 예측
>    * 데이터 불러오기
>    * 모델 불러오기 & 예측
>    * 예측 결과 확인 및 MLflow UI 업로드

    

<hr/>

***

In [22]:
# 클라이언트 관련 라이브러리
#from mlflow import MlflowClient
from pprint import pprint
import mlflow
import pandas as pd
import numpy as np 
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

#### 클라이언트 & Experiments 설정

In [2]:
# MLflow Client 생성하기
client = mlflow.MlflowClient(tracking_uri="http://127.0.0.1:8080") #서버 주소

# 동작하지 않을 시 Bash 환경에서 아래 코드 동작
# mlflow server --host 192.168.25.154 --port 5000

In [3]:
# 사용할 MLflow UI server 셋팅
mlflow.set_tracking_uri("http://127.0.0.1:8080")

In [4]:
# experiments 확인
client.search_experiments()

[<Experiment: artifact_location='mlflow-artifacts:/660539802931161362', creation_time=1708501099510, experiment_id='660539802931161362', last_update_time=1708501099510, lifecycle_stage='active', name='Bped_442T06_Li', tags={'mlflow.note.content': '프로젝트 명 : ML Flow를 활용한 BPED 개발 Tutorial\n'
                         '개발 모델 명 : 442T06_Li Regression 모델\n'
                         '개발자 : 안광혁\n'
                         '개발 기간 : 2024.02.16 ~\n',
  'project_name': 'MLflow Tutorial',
  'project_quarter': 'Q1-2024',
  'store_dept': 'AI사업부',
  'team': 'Data Analyasis'}>,
 <Experiment: artifact_location='mlflow-artifacts:/565309613522282985', creation_time=1707877184950, experiment_id='565309613522282985', last_update_time=1707877184950, lifecycle_stage='active', name='MLflow Quickstart', tags={}>,
 <Experiment: artifact_location='mlflow-artifacts:/0', creation_time=1707877184924, experiment_id='0', last_update_time=1707877184924, lifecycle_stage='active', name='Default', tags={}>]

In [5]:
# experiment 확인 filter를 이용한 확인

search_442T06_Li_experiment = client.search_experiments(
    filter_string="tags.`project_name` = 'MLflow Tutorial'"
)

pprint(search_442T06_Li_experiment)

[<Experiment: artifact_location='mlflow-artifacts:/660539802931161362', creation_time=1708501099510, experiment_id='660539802931161362', last_update_time=1708501099510, lifecycle_stage='active', name='Bped_442T06_Li', tags={'mlflow.note.content': '프로젝트 명 : ML Flow를 활용한 BPED 개발 Tutorial\n'
                        '개발 모델 명 : 442T06_Li Regression 모델\n'
                        '개발자 : 안광혁\n'
                        '개발 기간 : 2024.02.16 ~\n',
 'project_name': 'MLflow Tutorial',
 'project_quarter': 'Q1-2024',
 'store_dept': 'AI사업부',
 'team': 'Data Analyasis'}>]


In [6]:
# 특정 실험(experiment)의 모든 run ID 조회

experiment_name = "Bped_442T06_Li"

experiment_id = mlflow.get_experiment_by_name(experiment_name).experiment_id
runs = mlflow.search_runs(experiment_ids=experiment_id)
run_ids = runs["run_id"].tolist()
print("All Run IDs in the Experiment:", run_ids)

All Run IDs in the Experiment: ['09a065bc849f49fba61a7728fbd37c52', 'd6ac6348d01a4b348ed70d0f78120d1a', '7ef55345a8cb4ec9a507a01f7bf3f372', '3932a8c8e5b3429e9ad427837a332b88', '142fa2ee028e456697da90de1a97d6a0', '700dfe3f00654935bda834dc883f066d', '6597bfe49de44ab7b623731f1a4f353c', '022670ae21664254a255fae7b6d48f92']


In [7]:
# 예측에 사용할 모델의 URI 찾기

run_id = "3932a8c8e5b3429e9ad427837a332b88" # Web UI상에서 활용할 모델의 RUN ID 확인 후 작성
model_path = "model" # 모델 학습시 설정한 model_path 일반적인 상황에서 model로 사용
model_uri =f"runs:/{run_id}/{model_path}"

* * *

#### Unseen Data 예측하기

In [15]:
# 데이터 불러오기 

# 사용 변수 정보
df_features = pd.read_json('model_features_v1.2.json')
COL_442T06_Li = df_features[df_features['442T06_Li']=='Y'][['INDEX','FEATURE']] #23년

unseen_X = pd.read_csv('data/first_testdata.csv')
unseen_y = pd.read_csv('data/first_testdata.csv')

X = unseen_X[COL_442T06_Li['FEATURE'].to_list()]
Y = unseen_y['442T06_Li']

In [18]:
# 모델 불러오기
loaded_model = mlflow.pyfunc.load_model(model_uri)

# 입력 데이터에 대한 예측 수행
unseen_y_pred = loaded_model.predict(X)

Downloading artifacts: 100%|██████████| 6/6 [00:03<00:00,  1.61it/s]


In [32]:
unseen_y_pred

array([22.163313, 22.163313, 22.163313, ..., 22.15953 , 22.15953 ,
       22.15953 ], dtype=float32)

In [29]:
# Calculate error metrics
mae = round(mean_absolute_error(Y, unseen_y_pred),2)
mse = round(mean_squared_error(Y, unseen_y_pred),2)
rmse = round(np.sqrt(mse),2)

metrics = {"mean_absolute_error": mae, "mean_squared_error": mse, "root_mean_squared_error": rmse}

In [30]:
metrics

{'mean_absolute_error': 6.78,
 'mean_squared_error': 46.45,
 'root_mean_squared_error': 6.82}

In [31]:
# 사용할 MLflow UI server 셋팅
mlflow.set_tracking_uri("http://127.0.0.1:8080")
mlflow.set_experiment("Bped_442T06_Li")

# 예측 결과 업로드
with mlflow.start_run(run_name="Model Evaluation") as run:
    mlflow.log_metric("mean_absolute_error", mae)
    mlflow.log_metric("mean_squared_error", mse)
    mlflow.log_metric("root_mean_squared_error", rmse)
