# MLOps: Vista del cientifico de Datos (Jupyter)

**Modelo:** Predicción calidad del vino

**Datos:** http://archive.ics.uci.edu/ml/datasets/Wine+Quality

**Algoritmo:** sklearn.linear_model.ElasticNet

**Hyperparametros:** alfa, l1_ratio

In [9]:
# Calidad del vino
def train(in_alpha, in_l1_ratio):
    import os
    import warnings
    import sys

    import pandas as pd
    import numpy as np
    from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
    from sklearn.model_selection import train_test_split
    from sklearn.linear_model import ElasticNet

    import mlflow
    import mlflow.sklearn
    
    import logging
    logging.basicConfig(level=logging.WARN)
    logger = logging.getLogger(__name__)
    
    remote_server_uri = 'http://127.0.0.1:5000/'
    mlflow.set_tracking_uri(remote_server_uri)
    #mlflow.set_experiment('Calidad del Vino')

    
    
    def eval_metrics(actual, pred):
        rmse = np.sqrt(mean_squared_error(actual, pred))
        mae = mean_absolute_error(actual, pred)
        r2 = r2_score(actual, pred)
        return rmse, mae, r2


    warnings.filterwarnings("ignore")
    np.random.seed(40)

    # obtener los datos
    csv_url =\
        'http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
    try:
        data = pd.read_csv(csv_url, sep=';')
    except Exception as e:
        logger.exception(
            "No se puede descargar CSV de entrenamiento y prueba. Error: %s", e)

    # dividir los datos en conjuntos de entrenamiento y prueba. (0.75, 0.25).
    train, test = train_test_split(data)

    # El objetivo a predecir es "calidad" del vino, que es un valor de [3, 9]
    train_x = train.drop(["quality"], axis=1)
    test_x = test.drop(["quality"], axis=1)
    train_y = train[["quality"]]
    test_y = test[["quality"]]

    # Establecer el valor predeterminado de alfa si no se proporciona
    if float(in_alpha) is None:
        alpha = 0.5
    else:
        alpha = float(in_alpha)

    # Establecer el valor predeterminado de l1_ratio si no se proporciona
    if float(in_l1_ratio) is None:
        l1_ratio = 0.5
    else:
        l1_ratio = float(in_l1_ratio)

    # Útil para generar múltiples ejecuciones de entrenamiento
    mlflow.start_run()
    mlflow.set_experiment('Calidad del Vino')
    tags = {
        'framework':'sklearn',
        'model':'ElasticNet'
    }
    mlflow.set_tags(tags)
    mlflow.log_artifact("wine-quality.csv", artifact_path=None)
    
    # Ejecutar ElasticNet
    lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    lr.fit(train_x, train_y)

    # Evaluar Metricas
    predicted_qualities = lr.predict(test_x)
    (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)

    # Imprimir metricas
    print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
    print("  RMSE: %s" % rmse)
    print("  MAE: %s" % mae)
    print("  R2: %s" % r2)

    # Registrat los parametros, metricas, y el modelo a MLflow
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("l1_ratio", l1_ratio)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2", r2)
    mlflow.log_metric("mae", mae)

    mlflow.sklearn.log_model(lr, "model")
    mlflow.end_run()

In [10]:
train(0.5, 0.5)

Elasticnet model (alpha=0.500000, l1_ratio=0.500000):
  RMSE: 0.7931640229276851
  MAE: 0.6271946374319587
  R2: 0.10862644997792614


In [8]:
train(0.2, 0.2)

Elasticnet model (alpha=0.200000, l1_ratio=0.200000):
  RMSE: 0.7336400911821402
  MAE: 0.5643841279275427
  R2: 0.2373946606358417


In [14]:
train(0.1, 0.1)

Elasticnet model (alpha=0.100000, l1_ratio=0.100000):
  RMSE: 0.7128829045893679
  MAE: 0.5462202174984665
  R2: 0.2799376066653344
