# Imports

In [21]:
import pandas as pd
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Loading Data

In [6]:
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
df_raw = pd.read_csv(url, sep=';')
df_raw.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


# Data Preparation

In [7]:
df1 = df_raw.copy()

In [9]:
train, test = train_test_split(df1)

In [10]:
X_train = train.drop('quality', 1)
y_train = train.quality

X_test = train.drop('quality', 1)
y_test = train.quality

# ML Modelling

In [12]:
model = ElasticNet(alpha=0.5, l1_ratio=0.5, random_state=42)

In [13]:
model.fit(X_train, y_train)

ElasticNet(alpha=0.5, copy_X=True, fit_intercept=True, l1_ratio=0.5,
           max_iter=1000, normalize=False, positive=False, precompute=False,
           random_state=42, selection='cyclic', tol=0.0001, warm_start=False)

In [14]:
preds = model.predict(X_test)

In [18]:
rmse = np.sqrt(mean_squared_error(y_test, preds))
mae = mean_absolute_error(y_test, preds)
r2 = r2_score(y_test, preds)

In [19]:
print('RMSE: {}'.format(rmse))
print('MAE: {}'.format(mae))
print('R2: {}'.format(r2))

RMSE: 0.751955799556155
MAE: 0.6086577687761838
R2: 0.14826162742039872


In [25]:
pickle.dump(model, open('/Users/olavo/Documents/Notebooks/DataScienceWorkshop/model_wine_quality.pkl', 'wb'))