In [1]:
import os
import warnings
import sys

import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet

import mlflow
import mlflow.sklearn

import logging
logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

In [2]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

In [3]:
warnings.filterwarnings("ignore")
np.random.seed(40)

# Read the wine-quality csv file from the URL
csv_url ='http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
try:
    data = pd.read_csv(csv_url, sep=';')
except Exception as e:
    logger.exception(
        "Unable to download training & test CSV, check your internet connection. Error: %s", e)

# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)


In [4]:
print(train.head())

      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
1316            5.4              0.74         0.00             1.2      0.041   
1507            7.5              0.38         0.57             2.3      0.106   
849             6.4              0.63         0.21             1.6      0.080   
82              7.4              0.50         0.47             2.0      0.086   
644             9.9              0.54         0.45             2.3      0.071   

      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
1316                 16.0                  46.0  0.99258  4.01       0.59   
1507                  5.0                  12.0  0.99605  3.36       0.55   
849                  12.0                  32.0  0.99689  3.58       0.66   
82                   21.0                  73.0  0.99700  3.36       0.57   
644                  16.0                  40.0  0.99910  3.39       0.62   

      alcohol  quality  
1316     12.5        6  


In [5]:
train.shape

(1199, 12)

In [6]:
print(test.head())

      fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
1035           10.1              0.37         0.34             2.4      0.085   
49              5.6              0.31         0.37             1.4      0.074   
799             9.4              0.50         0.34             3.6      0.082   
538            12.9              0.35         0.49             5.8      0.066   
660             7.2              0.52         0.07             1.4      0.074   

      free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
1035                  5.0                  17.0  0.99683  3.17       0.65   
49                   12.0                  96.0  0.99540  3.32       0.58   
799                   5.0                  14.0  0.99870  3.29       0.52   
538                   5.0                  35.0  1.00140  3.20       0.66   
660                   5.0                  20.0  0.99730  3.32       0.81   

      alcohol  quality  
1035     10.6        7  


In [7]:
test.shape

(400, 12)

In [8]:
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)

In [9]:
train_y = train[["quality"]]
test_y = test[["quality"]]


In [10]:
train_x.shape

(1199, 11)

In [11]:
test_x.shape

(400, 11)

In [12]:
train_y.shape

(1199, 1)

In [13]:
train_y.shape

(1199, 1)

In [16]:
alpha = 0.75 #float(sys.argv[1]) if len(sys.argv) > 1 else 0.5    our first argument is now alpha = 0.75
l1_ratio = 0.75 #float(sys.argv[2]) if len(sys.argv) > 2 else 0.5 our second argument is now l1_ratio = 0.75

In [17]:
with mlflow.start_run():
    lr = ElasticNet(alpha = alpha,l1_ratio= l1_ratio,random_state=42)
    lr.fit(train_x,train_y)

    predicted_qualities = lr.predict(test_x)
    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
    print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)

    mlflow.sklearn.log_model(lr, "model")

Elasticnet model (alpha=0.750000, l1_ratio=0.750000):
  RMSE: 0.8331799787336064
  MAE: 0.669234506901795
  R2: 0.016416170929074214
