In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the red wine dataset
red_wine = pd.read_csv('winequality-red.csv', delimiter=';')

# Prepare the data
X_red = red_wine.drop(columns=['quality'])
y_red = red_wine['quality']

# Split the data into training and test sets
X_train_red, X_test_red, y_train_red, y_test_red = train_test_split(X_red, y_red, test_size=0.2, random_state=42)

# Standardize the data
scaler_red = StandardScaler()
X_train_red = scaler_red.fit_transform(X_train_red)
X_test_red = scaler_red.transform(X_test_red)

# Train the Linear Regression model
regression_red = LinearRegression()
regression_red.fit(X_train_red, y_train_red)

# Predict and evaluate
y_pred_train_red = regression_red.predict(X_train_red)
y_pred_test_red = regression_red.predict(X_test_red)

print("Red Wine Regression Train RMSE:", mean_squared_error(y_train_red, y_pred_train_red, squared=False))
print("Red Wine Regression Test RMSE:", mean_squared_error(y_test_red, y_pred_test_red, squared=False))
print("Red Wine Regression Train R2:", r2_score(y_train_red, y_pred_train_red))
print("Red Wine Regression Test R2:", r2_score(y_test_red, y_pred_test_red))


Red Wine Regression Train RMSE: 0.6512995910592836
Red Wine Regression Test RMSE: 0.624519930798013
Red Wine Regression Train R2: 0.3479926193529862
Red Wine Regression Test R2: 0.4031803412796219


In [2]:
red_wine.isnull().sum()

fixed acidity           0
volatile acidity        0
citric acid             0
residual sugar          0
chlorides               0
free sulfur dioxide     0
total sulfur dioxide    0
density                 0
pH                      0
sulphates               0
alcohol                 0
quality                 0
dtype: int64