# Regression using Red wine quality dataset

### import libraries

In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

### Import Red wine dataset

In [None]:
data = pd.read_csv('../input/winequality-red.csv')
data.head()

In [None]:
data.shape

### Check for null values

In [None]:
data[data.isnull().any(axis=1)]

### Make list of features as regressor inputs

In [None]:
data.shape

In [None]:
data.columns

In [None]:
features = ['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol']

### Make target variable

In [None]:
target = ['quality']

### Assign input and output varibles

In [None]:
X = data[features]

In [None]:
y = data[target]

### Split the data

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=324)

### Train linear regression model

In [None]:
regressor = LinearRegression()
regressor.fit(X_train,y_train)

In [None]:
pred_quality = regressor.predict(X_test)

In [None]:
pred_quality

In [None]:
y_test.describe()

### Linear regressor model Accuracy

In [None]:
RMSE = mean_squared_error(y_true=y_test, y_pred=pred_quality)
print(RMSE)

### Compare with DecisionTreeRegressor model

In [None]:
regressor = DecisionTreeRegressor(max_depth=30)
regressor.fit(X_train, y_train)

In [None]:
pred_quality = regressor.predict(X_test)

In [None]:
pred_quality

In [None]:
y_test.describe()

### DecisionTreeRegressor Model Accuracy

In [None]:
RMSE = mean_squared_error(y_true=y_test, y_pred=pred_quality)
print(RMSE)