### Import ml_model folder as a package and import train script from it 

In [7]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [2]:
from ml_training import train

## Load data

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder

In [4]:
# Read the wine-quality csv file
data = pd.read_csv(train.DATASET)

## Run training

In [9]:

# Split the data into training and test sets. (0.75, 0.25) split.
train_data, test = train.train_test_split(data)

# The predicted column is "quality" which is a scalar from [3, 9]

# Train model, get MlFlow RUN
train_run = train.train_model(
    alpha=0.3,
    l1_ratio=0.5,
    train_x=train_data.drop(["quality"], axis=1),
    train_y=train_data[["quality"]],
    test_x=test.drop(["quality"], axis=1),
    test_y=test[["quality"]]
)

Elasticnet model (alpha=0.3, l1_ratio=0.5):
  RMSE: 0.8180482314819962
  MAE: 0.637743823768237
  R2: 0.17476981233924171


Registered model 'wine-quality-prediction' already exists. Creating a new version of this model...
2021/08/27 10:21:34 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: wine-quality-prediction, version 6
Created version '6' of model 'wine-quality-prediction'.


In [6]:
# Testing logic for the model
# create_test_handler builds a wrapper of the inference code
# and this can be used to test how your inference code works with a model
test_handler = train.create_test_handler(
    run_id=train_run.info.run_id,
    model_name=train.MODEL_PREDICTOR_NAME,
    inference_code_location='../ml-service/'
)

# Wine A
graphql_prediction = test_handler.query_graphl(
    '''
    query {
        prediction(
          fixedAcidity: 6.2, volatileAcidity: 0.32, 
          citricAcid: 0.35, residualSugar: 6.1, 
          chlorides: 0.04, freeSulfurDioxide: 50.0, 
          totalSulfurDioxide: 100.0, density: 0.98,
          pH: 3.10, sulphates: 0.2, alcohol: 10.1
        ) {
            quality
        }
    }
    '''
)['prediction']['quality']
print(f'Quality of Wine A predicted as: {graphql_prediction}')

dict_prediction = test_handler.query(**{
    'fixed acidity': 7.0,
    'volatile acidity': 0.30,
    'citric acid': 0.14,
    'residual sugar': 1.2,
    'chlorides': 0.02,
    'free sulfur dioxide': 10.0,
    'total sulfur dioxide': 120.0,
    'density': 0.97,
    'pH': 3.2,
    'sulphates': 0.3,
    'alcohol': 11.0
})
print(f'Quality of Wine B predicted as: {dict_prediction}')

# Check that density is being validated
try:
    test_handler.query(**{
        'fixed acidity': 7.0,
        'volatile acidity': 0.30,
        'citric acid': 0.14,
        'residual sugar': 1.2,
        'chlorides': 0.02,
        'free sulfur dioxide': 10.0,
        'total sulfur dioxide': 120.0,
        'density': 1.2,
        'pH': 3.2,
        'sulphates': 0.3,
        'alcohol': 11.0
    })
    raise Exception('Exception has not been raised for density = 1.2')
except train.InvalidModelInputException:
    print('Exception has been raised for density = 1.2')

Quality of Wine A predicted as: 6.05694305139275
Quality of Wine B predicted as: 5.835322956062144
Exception has been raised for density = 1.2
