# MLflow 測試

## 載入相關套件

In [6]:
from sklearn import datasets
import os
import warnings
import sys
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
import mlflow
import mlflow.sklearn

## 載入資料集

In [2]:
X, y = datasets.load_diabetes(return_X_y=True)

## 資料分割

In [3]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

## 模型訓練與評估

In [11]:
# 定義模型參數
alpha = 1
l1_ratio = 1

with mlflow.start_run():
    # 模型訓練
    model = ElasticNet(alpha = alpha,
                       l1_ratio = l1_ratio)
    model.fit(X_train,y_train)
    
    # 模型評估
    pred = model.predict(X_test)
    rmse = mean_squared_error(pred, y_test)
    abs_error = mean_absolute_error(pred, y_test)
    r2 = r2_score(pred, y_test)
    
    # MLflow 記錄
    mlflow.log_param('alpha', alpha)
    mlflow.log_param('l1_ratio', l1_ratio)
    mlflow.log_metric('rmse', rmse)
    mlflow.log_metric('abs_error', abs_error)
    mlflow.log_metric('r2', r2)
    
    # MLflow 記錄模型
    mlflow.sklearn.log_model(model, "model")



## 模型評估

In [13]:
mlflow.sklearn.log_model(lr, "model")

平均分數: 0.9303030303030303, 標準差: 0.08393720596645175


## 使用迴歸模型

In [37]:
from sklearn.linear_model import RidgeCV
from sklearn.svm import LinearSVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import StackingRegressor
from sklearn.preprocessing import StandardScaler

X, y = datasets.load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2)

scaler = StandardScaler()
X_train_std = scaler.fit_transform(X_train)
X_test_std = scaler.transform(X_test)

estimators = [
    ('lr', RidgeCV()),
    ('svr', LinearSVR(random_state=42))
]

model = StackingRegressor(
    estimators=estimators,
    final_estimator=RandomForestRegressor(n_estimators=10, random_state=42))
model.fit(X_train_std, y_train)
scores = cross_val_score(model, X_test_std, y_test, cv=10)
print(f'平均分數: {np.mean(scores)}, 標準差: {np.std(scores)}')

平均分數: 0.12143159519945441, 標準差: 0.4732757387323812


In [38]:
svc = LinearSVR()
svc.fit(X_train_std, y_train)
scores = cross_val_score(svc, X_test_std, y_test, cv=10)
print(f'平均分數: {np.mean(scores)}, 標準差: {np.std(scores)}')

平均分數: -1.0399780386178537, 標準差: 0.36412901584183494
