In [1]:
import pandas as pd 
import sklearn
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.linear_model import LinearRegression
from urllib.parse import urlparse
import mlflow 
import mlflow.sklearn 
import numpy as np

import matplotlib.pyplot as plt

In [2]:
csv_url = (
        "http://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"
    )
data = pd.read_csv(csv_url, sep = ';')
data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [3]:
train, test = train_test_split(data)
print(f'shapes: \ntrain: {train.shape}, test: {test.shape}')

shapes: 
train: (1199, 12), test: (400, 12)


In [4]:
train_x = train.drop(columns = 'quality')
train_y = train[['quality']]
test_x = test.drop(columns = 'quality')
test_y = test[['quality']]

print(f'''
shapes: 
train_x: {train_x.shape}, 
train_y: {train_y.shape},
test_x: {test_x.shape},
test_y: {test_y.shape}
''')


shapes: 
train_x: (1199, 11), 
train_y: (1199, 1),
test_x: (400, 11),
test_y: (400, 1)



In [35]:
lr1 = LinearRegression().fit(X = train_x, y = train_y)
predicted_qualities = lr1.predict(test_x).reshape([400,])

df_prediction = pd.DataFrame({'test_y': test_y.quality, 'predicted': predicted_qualities})
df_prediction.head()

Unnamed: 0,test_y,predicted
1006,7,6.554333
552,6,5.732022
410,6,5.333098
920,5,5.753958
1381,5,4.929838


In [9]:
mse = mean_squared_error(test_y, predicted_qualities)
rmse = np.sqrt(mse)
r2 = r2_score(test_y, predicted_qualities)
mae = mean_absolute_error(test_y, predicted_qualities)
print(f"""
Regressão linear simples: só um exemplo
MSE: {mse}
RMSE: {rmse}
MAE: {mae}
""")


Regressão linear simples: só um exemplo
MSE: 0.4840011318469941
RMSE: 0.6957018986944007
MAE: 0.5372627090802302



In [44]:
data.to_csv('winequality_red.csv', sep = ';')
df_prediction.to_csv('results.pkl')

with mlflow.start_run(run_name = "regressao_simples"):
    mlflow.set_tag('regressao_linear_simples','0')
    mlflow.log_param('mse',mse)
    mlflow.log_param('rmse',rmse)
    mlflow.log_param('r2',r2)
    mlflow.log_param('mae',mae)
    mlflow.log_param('idade do sergio',38)    
    mlflow.sklearn.log_model(lr1, "linear_regression_model")
    mlflow.log_artifact('winequality_red.csv')
    mlflow.log_artifact('results.pkl')

In [45]:
alpha = 0.5
l1_ratio = 0.5

lr = ElasticNet(alpha = alpha, l1_ratio = l1_ratio, random_state = 42)
lr.fit(train_x, train_y)

predicted_qualities = lr.predict(test_x)
df_prediction = pd.DataFrame({'test_y': test_y.quality, 'predicted': predicted_qualities})

df_prediction['predicted_rounded'] = df_prediction['predicted'].apply(round)
df_prediction.head()

Unnamed: 0,test_y,predicted,predicted_rounded
1006,7,5.845366,6
552,6,5.601084,6
410,6,5.403951,5
920,5,5.731435,6
1381,5,5.612048,6


In [46]:
mse = mean_squared_error(test_y, predicted_qualities)
rmse = np.sqrt(mse)
r2 = r2_score(test_y, predicted_qualities)
mae = mean_absolute_error(test_y, predicted_qualities)
print(f"""
Elasticnet model: alpha = {alpha}, l1_ration = {l1_ratio}
MSE: {mse}
RMSE: {rmse}
MAE: {mae}
""")


Elasticnet model: alpha = 0.5, l1_ration = 0.5
MSE: 0.628681648690539
RMSE: 0.7928944751292816
MAE: 0.638227158446278



In [47]:
mse = mean_squared_error(test_y, predicted_qualities)
rmse = np.sqrt(mse)
r2 = r2_score(test_y, predicted_qualities)
mae = mean_absolute_error(test_y, predicted_qualities)
print(f"""
Elasticnet model: alpha = {alpha}, l1_ration = {l1_ratio}
MSE: {mse}
RMSE: {rmse}
MAE: {mae}
""")


Elasticnet model: alpha = 0.5, l1_ration = 0.5
MSE: 0.628681648690539
RMSE: 0.7928944751292816
MAE: 0.638227158446278



In [48]:
with mlflow.start_run(run_name = "elasticnet"):
    mlflow.log_param('alpha',alpha)
    mlflow.log_param('l1_ratio',alpha)
    mlflow.log_param('rmse',alpha)
    mlflow.log_param('r2',alpha)
    mlflow.log_param('mae',alpha)
    mlflow.log_param('idade do sergio',38)
    mlflow.sklearn.log_model(lr, "elasticnet_model")