# MLFlow 机器学习工作流程-notebook

- MLFlow使用教程，https://my.oschina.net/u/2306127/blog/1825690
- MLFlow官方文档，https://www.mlflow.org/docs/latest/quickstart.html
- 快速安装: ** pip install mlflow **

In [43]:
#下载代码
#!git clone https://github.com/databricks/mlflow

In [42]:
#%%!
#export https_proxy=http://192.168.199.99:9999
#echo $https_proxy
#pip install mlflow

In [41]:
#!pip install mlflow

In [40]:
#!ls -l mlflow

In [3]:
# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.

import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

import mlflow
import mlflow.sklearn


def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

### 准备数据

In [10]:
warnings.filterwarnings("ignore")
np.random.seed(40)

# Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
#wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "./mlflow/example/wine-quality.csv")
wine_path = "../mlflow/example/tutorial/wine-quality.csv"
data = pd.read_csv(wine_path)

# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)

# The predicted column is "quality" which is a scalar from [3, 9]
train_x = train.drop(["quality"], axis=1)
train_y = train[["quality"]]

test_x = test.drop(["quality"], axis=1)
test_y = test[["quality"]]

In [44]:
print("Traing dataset:\n")
train[0:10]

Traing dataset:



Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
2213,7.4,0.17,0.29,1.4,0.047,23.0,107.0,0.9939,3.52,0.65,10.4,6
3375,7.8,0.22,0.26,9.0,0.047,38.0,132.0,0.997,3.25,0.53,10.2,6
4596,6.7,0.53,0.29,4.3,0.069,20.0,114.0,0.99014,3.22,0.59,13.4,6
4360,7.3,0.28,0.35,1.6,0.054,31.0,148.0,0.99178,3.18,0.47,10.7,5
269,5.4,0.59,0.07,7.0,0.045,36.0,147.0,0.9944,3.34,0.57,9.7,6
1900,10.0,0.23,0.27,14.1,0.033,45.0,166.0,0.9988,2.72,0.43,9.7,6
4394,6.3,0.2,0.26,12.7,0.046,60.0,143.0,0.99526,3.26,0.35,10.8,6
4002,6.1,0.25,0.48,15.8,0.052,25.0,94.0,0.99782,3.07,0.45,9.2,6
3477,7.6,0.31,0.24,1.8,0.037,39.0,150.0,0.9913,3.05,0.44,11.8,7
971,6.0,0.27,0.19,1.7,0.02,24.0,110.0,0.9898,3.32,0.47,12.6,7


In [45]:
#train_x[0:10]

In [46]:
#train_y[0:10]

In [47]:
#test_x[0:10]

In [48]:
#test_y[0:10]

### 拟合模型，数据预测，精度评估，记录参数。

In [52]:
def learning(alpha = 0.5, l1_ratio = 0.5):
    with mlflow.start_run():
        lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
        lr.fit(train_x, train_y)

        predicted_qualities = lr.predict(test_x)

        (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

        #print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
        #print("  RMSE: %s" % rmse)
        #print("  MAE: %s" % mae)
        #print("  R2: %s" % r2)
        print("Elasticnet model (alpha=%f, l1_ratio=%f): \tRMSE: %s, \tMAE: %s, \tR2: %s" % (alpha, l1_ratio,rmse,mae,r2))

        mlflow.log_param("alpha", alpha)
        mlflow.log_param("l1_ratio", l1_ratio)
        
        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2", r2)
        mlflow.log_metric("mae", mae)

        mlflow.sklearn.log_model(lr, "model")

### 根据参数计算误差。

In [55]:
learning()
learning(0.8,0.8)

Elasticnet model (alpha=0.500000, l1_ratio=0.500000): 	RMSE: 0.82224284976, 	MAE: 0.627876141016, 	R2: 0.126787219728
Elasticnet model (alpha=0.800000, l1_ratio=0.800000): 	RMSE: 0.859868563763, 	MAE: 0.647899138083, 	R2: 0.0450425619538


### 多参数批量计算。

In [57]:
# 参数的总计算步数，据此自动生成参数。
steps_alpha = 10
steps_l1_ratio = 10

# 开始计算。
for i in range(steps_alpha):
    for j in range(steps_l1_ratio):
        learning(i/10,j/10)

Elasticnet model (alpha=0.000000, l1_ratio=0.000000): 	RMSE: 0.742416293856, 	MAE: 0.577516890713, 	R2: 0.288106771584
Elasticnet model (alpha=0.000000, l1_ratio=0.100000): 	RMSE: 0.742416293856, 	MAE: 0.577516890713, 	R2: 0.288106771584
Elasticnet model (alpha=0.000000, l1_ratio=0.200000): 	RMSE: 0.742416293856, 	MAE: 0.577516890713, 	R2: 0.288106771584
Elasticnet model (alpha=0.000000, l1_ratio=0.300000): 	RMSE: 0.742416293856, 	MAE: 0.577516890713, 	R2: 0.288106771584
Elasticnet model (alpha=0.000000, l1_ratio=0.400000): 	RMSE: 0.742416293856, 	MAE: 0.577516890713, 	R2: 0.288106771584
Elasticnet model (alpha=0.000000, l1_ratio=0.500000): 	RMSE: 0.742416293856, 	MAE: 0.577516890713, 	R2: 0.288106771584
Elasticnet model (alpha=0.000000, l1_ratio=0.600000): 	RMSE: 0.742416293856, 	MAE: 0.577516890713, 	R2: 0.288106771584
Elasticnet model (alpha=0.000000, l1_ratio=0.700000): 	RMSE: 0.742416293856, 	MAE: 0.577516890713, 	R2: 0.288106771584
Elasticnet model (alpha=0.000000, l1_ratio=0.800