# Experiment

In [None]:
import os, warnings, sys, shutil
import pandas as pd,  numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.externals import joblib
from mlflow.sklearn import log_model
import mlflow

In [None]:
hyperparam = {
                'alpha' : 0.45,
                'l1_ratio' : 0.65
             }
metric = {}
config = {
            'experiment_name': 'mlflow_test3',
            'user_id': 'Septian',
            'tracking_uri': 'http://172.31.45.72:5000',
            'artifact_location': 's3://cig-ds-dev/mlflow/mlflow_test2',
            'use_git_version': True
         }

model_path = os.path.expanduser('~/model')
if not os.path.isdir(model_path): os.makedirs(model_path)

In [None]:
from experiment import Experiment

In [None]:
# Initiate the experiment and run entities
ex_log = Experiment(config)
ex_log.create_run('run_1')

In [None]:
%%time
warnings.filterwarnings("ignore")
np.random.seed(40)

# Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
wine_path = os.path.join(os.getcwd(), "wine-quality.csv")
data = pd.read_csv(wine_path)

# Split the data into training and test sets. (0.75, 0.25) split.
train, test = train_test_split(data)

# The predicted column is "quality" which is a scalar from [3, 9]
train_x = train.drop(["quality"], axis=1)
test_x = test.drop(["quality"], axis=1)
train_y = train[["quality"]]
test_y = test[["quality"]]

In [None]:
def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return rmse, mae, r2

In [None]:
lr = ElasticNet(alpha=hyperparam['alpha'], l1_ratio=hyperparam['l1_ratio'], random_state=42)
lr.fit(train_x, train_y)

predicted_qualities = lr.predict(test_x)

(metric['rmse'], metric['mae'], metric['r2']) = eval_metrics(test_y, predicted_qualities)

print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (hyperparam['alpha'], hyperparam['l1_ratio']))
print("  RMSE: %s" % metric['rmse'])
print("  MAE: %s" % metric['mae'])
print("  R2: %s" %  metric['r2'])

In [None]:
# Save the hyperparam & metric
ex_log.log_params(hyperparam)
ex_log.log_metrics(metric)

In [None]:
# Save artifacts (model, vectorizer, etc)
# mlflow.sklearn.log_model(lr, 'model')
shutil.rmtree(model_path)
if not os.path.isdir(model_path): os.makedirs(model_path)
joblib.dump(lr, os.path.join(model_path, 'model.pkl'))
ex_log.log_artifacts(model_path,'model')

In [None]:
# Terminate run
ex_log.terminate_run()