In [1]:
import pickle
import pandas as pd

from flaml import AutoML

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
data = pd.read_csv('winequality-red.csv', sep=";")

In [3]:
data.columns = [column.replace(" ", "_") for column in data.columns.to_list()]

In [4]:
X, y = data.drop(columns='quality'), data.quality

In [5]:
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, 
                                                test_size=0.2, 
                                                random_state=20130810)

In [13]:
AUTOML_SETTINGS = {"time_budget": 360,  # total running time in seconds
                   "metric": 'micro_f1',  
                   "estimator_list": ['lgbm'],  
                   "task": 'classification',  # task type    
                   "log_file_name": 'openmldata.log',  # flaml log file
                   "seed": 20130810,    # random seed
                   "verbose": 1
}

In [14]:
learner_automl = AutoML()

In [15]:
learner_automl.fit(Xtrain, ytrain, **AUTOML_SETTINGS)

In [16]:
print(classification_report(ytest, learner_automl.predict(Xtest)))

              precision    recall  f1-score   support

           3       0.00      0.00      0.00         2
           4       0.00      0.00      0.00         9
           5       0.59      0.74      0.66       133
           6       0.55      0.50      0.52       130
           7       0.50      0.27      0.35        41
           8       1.00      0.20      0.33         5

    accuracy                           0.55       320
   macro avg       0.44      0.29      0.31       320
weighted avg       0.55      0.55      0.54       320



In [17]:
individual = (data.drop(columns='quality')
                  .loc[0, :]
                  .to_dict())

In [18]:
individual

{'fixed_acidity': 7.4,
 'volatile_acidity': 0.7,
 'citric_acid': 0.0,
 'residual_sugar': 1.9,
 'chlorides': 0.076,
 'free_sulfur_dioxide': 11.0,
 'total_sulfur_dioxide': 34.0,
 'density': 0.9978,
 'pH': 3.51,
 'sulphates': 0.56,
 'alcohol': 9.4}

In [24]:
def predict_single(individual, model):
    X = pd.DataFrame([individual])
    y_pred = model.predict(X)
    return y_pred[0]

In [25]:
predict_single(individual, learner_automl)

5

In [21]:
with open('wine-quality-prediction-model.pkl', 'wb') as f_out:
    pickle.dump(learner_automl, f_out, pickle.HIGHEST_PROTOCOL)