In [1]:
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.metrics import make_scorer

import numpy as np

from matplotlib import pyplot as plt

from sklearn.tree import DecisionTreeRegressor
from catboost import CatBoostRegressor

In [4]:
df = pd.read_csv('winequality-red.csv')

In [5]:
x = df.iloc[:,:-1]
y = df.iloc[:,-1]

X_train, X_test, y_train, y_test = train_test_split(x, y, train_size=0.6, random_state=100500)

In [6]:
mae = GridSearchCV(Lasso(), param_grid = {"alpha" : np.logspace(-3, 3, 200)}, cv = KFold(shuffle = True, random_state = 318, n_splits = 3), scoring = make_scorer(mean_absolute_error))
mae.fit(X_train, y_train)

rmse = GridSearchCV(Lasso(), param_grid = {"alpha" : np.logspace(-3, 3, 200)}, cv = KFold(shuffle = True, random_state = 318, n_splits = 3), scoring = make_scorer(mean_squared_error))
rmse.fit(X_train, y_train)

r2 = GridSearchCV(Lasso(), param_grid = {"alpha" : np.logspace(-3, 3, 200)}, cv = KFold(shuffle = True, random_state = 318, n_splits = 3), scoring = make_scorer(r2_score))
r2.fit(X_train, y_train)

GridSearchCV(cv=KFold(n_splits=3, random_state=318, shuffle=True),
             estimator=Lasso(),
             param_grid={'alpha': array([1.00000000e-03, 1.07189132e-03, 1.14895100e-03, 1.23155060e-03,
       1.32008840e-03, 1.41499130e-03, 1.51671689e-03, 1.62575567e-03,
       1.74263339e-03, 1.86791360e-03, 2.00220037e-03, 2.14614120e-03,
       2.30043012e-03, 2.46581108e-03, 2.64308149e-03, 2.83309610e-03,
       3...
       2.02550194e+02, 2.17111795e+02, 2.32720248e+02, 2.49450814e+02,
       2.67384162e+02, 2.86606762e+02, 3.07211300e+02, 3.29297126e+02,
       3.52970730e+02, 3.78346262e+02, 4.05546074e+02, 4.34701316e+02,
       4.65952567e+02, 4.99450512e+02, 5.35356668e+02, 5.73844165e+02,
       6.15098579e+02, 6.59318827e+02, 7.06718127e+02, 7.57525026e+02,
       8.11984499e+02, 8.70359136e+02, 9.32930403e+02, 1.00000000e+03])},
             scoring=make_scorer(r2_score))

In [7]:
print("Lasso")
print("RMSE : best score = ", rmse.best_score_, ", coef = ", rmse.best_estimator_.alpha)
print("MAE : best score = ", mae.best_score_, ", coef = ", mae.best_estimator_.alpha)
print("R2 : best score = ", r2.best_score_, ", coef = ", r2.best_estimator_.alpha)

Lasso
RMSE : best score =  0.6370979928935858 , coef =  5.478901179593939
MAE : best score =  0.6808180916743932 , coef =  5.478901179593939
R2 : best score =  0.33872056699557235 , coef =  0.001


In [8]:
mae = GridSearchCV(Ridge(), param_grid = {"alpha" : np.logspace(-3, 3, 200)}, cv = KFold(shuffle = True, random_state = 318, n_splits = 3), scoring = make_scorer(mean_absolute_error))
mae.fit(X_train, y_train)

rmse = GridSearchCV(Ridge(), param_grid = {"alpha" : np.logspace(-3, 3, 200)}, cv = KFold(shuffle = True, random_state = 318, n_splits = 3), scoring = make_scorer(mean_squared_error))
rmse.fit(X_train, y_train)

r2 = GridSearchCV(Ridge(), param_grid = {"alpha" : np.logspace(-3, 3, 200)}, cv = KFold(shuffle = True, random_state = 318, n_splits = 3), scoring = make_scorer(r2_score))
r2.fit(X_train, y_train)

GridSearchCV(cv=KFold(n_splits=3, random_state=318, shuffle=True),
             estimator=Ridge(),
             param_grid={'alpha': array([1.00000000e-03, 1.07189132e-03, 1.14895100e-03, 1.23155060e-03,
       1.32008840e-03, 1.41499130e-03, 1.51671689e-03, 1.62575567e-03,
       1.74263339e-03, 1.86791360e-03, 2.00220037e-03, 2.14614120e-03,
       2.30043012e-03, 2.46581108e-03, 2.64308149e-03, 2.83309610e-03,
       3...
       2.02550194e+02, 2.17111795e+02, 2.32720248e+02, 2.49450814e+02,
       2.67384162e+02, 2.86606762e+02, 3.07211300e+02, 3.29297126e+02,
       3.52970730e+02, 3.78346262e+02, 4.05546074e+02, 4.34701316e+02,
       4.65952567e+02, 4.99450512e+02, 5.35356668e+02, 5.73844165e+02,
       6.15098579e+02, 6.59318827e+02, 7.06718127e+02, 7.57525026e+02,
       8.11984499e+02, 8.70359136e+02, 9.32930403e+02, 1.00000000e+03])},
             scoring=make_scorer(r2_score))

In [9]:
print("Ridge")
print("RMSE : best score = ", rmse.best_score_, ", coef = ", rmse.best_estimator_.alpha)
print("MAE : best score = ", mae.best_score_, ", coef = ", mae.best_estimator_.alpha)
print("R2 : best score = ", r2.best_score_, ", coef = ", r2.best_estimator_.alpha)

Ridge
RMSE : best score =  0.5167003882715527 , coef =  1000.0
MAE : best score =  0.5714197922215072 , coef =  1000.0
R2 : best score =  0.34010236724033494 , coef =  0.5542664520663102


In [14]:
tree_model = DecisionTreeRegressor()
tree_model.fit(X_train, y_train)
y_pred = tree_model.predict(X_test)

In [15]:
print("DecisionTreeRegressor")
print("MSE : ",  mean_squared_error(y_test, y_pred))
print("MAE : ",  mean_absolute_error(y_test, y_pred))
print("R2 : ",  r2_score(y_test, y_pred))

DecisionTreeRegressor
MSE :  0.5984375
MAE :  0.4640625
R2 :  0.11022055727374369


In [16]:
tree_model = CatBoostRegressor()
tree_model.fit(X_train, y_train, silent=True)
y_pred = tree_model.predict(X_test)

In [17]:
print("CatBoostRegressor")
print("MSE : ",  mean_squared_error(y_test, y_pred))
print("MAE : ",  mean_absolute_error(y_test, y_pred))
print("R2 : ",  r2_score(y_test, y_pred))

CatBoostRegressor
MSE :  0.3712054142119149
MAE :  0.44792764940224983
R2 :  0.44807779159152494
