In [13]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error


In [4]:
data = pd.read_csv("winequality-red.csv", delimiter = ";")
print(data.shape)

(1599, 12)


In [5]:
data.isnull().sum()

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64

In [7]:
X = data.drop('quality', axis=1)
y = data['quality']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
models = {
    'Random Forest Regressor': RandomForestRegressor(n_estimators=100),
    'Gradient Boosting Regressor': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1),
    'Linear Regression': LinearRegression(),
    'SVR': SVR(C=1, kernel='rbf'),
    'Decision Tree Regressor': DecisionTreeRegressor(max_depth=10)
}

In [21]:
for model_name, model in models.items():
    # Train the model on the full training set
    model.fit(X_train, y_train)

    # Make predictions on the test set
    y_pred = model.predict(X_test)

    # Evaluate the model on the test set
    mse = mean_squared_error(y_test, y_pred)
    print(f'{model_name} - Test Mean Squared Error: {mse}\n')

Random Forest Regressor - Test Mean Squared Error: 0.31488187500000003

Gradient Boosting Regressor - Test Mean Squared Error: 0.3629826019992989

Linear Regression - Test Mean Squared Error: 0.390025143963954

SVR - Test Mean Squared Error: 0.5325010802600721

Decision Tree Regressor - Test Mean Squared Error: 0.5990230124325099

