### Main imports

In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### Regression sample

In [2]:
# input data reading
df = pd.read_csv("winequality-red.csv")
df.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


Save predictor variables and outcome variable in separate objects

In [3]:
# independent variables and dependent variable
x = df.loc[:, : "alcohol"]
y = df["quality"]

## Transform data
Scale the vairable before modeling

In [4]:
# features have to be scaled before model learning
scaler = StandardScaler()
x = scaler.fit_transform(x)

## Split data
Split data into training and testing parts

In [5]:
# spliting data to train and test parts
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

# Analysis

## Create Regression Models

LASSO: Least Absolute Shrinkage and Selection Operator 

In [6]:
# create and fit LASSO and SVR models 
lasso = Lasso(alpha=0.001)
lasso.fit(x_train, y_train)

Lasso(alpha=0.001, copy_X=True, fit_intercept=True, max_iter=1000,
      normalize=False, positive=False, precompute=False, random_state=None,
      selection='cyclic', tol=0.0001, warm_start=False)

SVR: Support Vector Regression

In [7]:
svr = SVR(C=8, epsilon=0.2, gamma=0.5)
svr.fit(x_train, y_train)

SVR(C=8, cache_size=200, coef0=0.0, degree=3, epsilon=0.2, gamma=0.5,
    kernel='rbf', max_iter=-1, shrinking=True, tol=0.001, verbose=False)

## Test Models
LASSO Prediction

In [8]:
# make prediction on test data and rounding results
y_pred_lasso = np.round(np.clip(lasso.predict(x_test), 1, 10)).astype(int)

In [9]:
np.round(1 - mean_squared_error(y_test, y_pred_lasso) / y_test.std(), 2)

0.4

SVR Prediction

In [10]:
y_pred_svr = np.round(np.clip(svr.predict(x_test), 1, 10)).astype(int)

In [11]:
np.round(1 - mean_squared_error(y_test, y_pred_svr) / y_test.std(), 2)

0.39

This LASSO model describes just 28% of initial data dispersion. With same data SVR describes 50%. Here SVR build nonlinear decision boundary using so-called "kernel trick"