In [29]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

import seaborn as sns


from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score


from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import cross_val_score, train_test_split



In [5]:
data = pd.read_csv("../input/white-wine-quality/winequality-white.csv",sep = ";")

In [7]:
data.head()

In [8]:
data.describe()

In [14]:
corr = data.corr()
fig, ax = plt.subplots(figsize=(10, 10))
sns.heatmap(corr,annot = True)

In [17]:
Data_X = data.iloc[:,:-1]
Data_Y = data['quality']

In [21]:
sc = StandardScaler()
Data_X = sc.fit_transform(Data_X)

In [24]:
Xtrain, Xtest, Ytrain, Ytest = train_test_split(Data_X, Data_Y, test_size = 0.1)


In [30]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(Xtrain, Ytrain)
pred_lr = lr.predict(Xtest)
lr_rmse = (mean_squared_error(Ytest, pred_lr))**0.5
lr_score = r2_score(Ytest, pred_lr)
cv_lr = cross_val_score(estimator = lr, X = Xtrain, y = Ytrain, cv = 10)

print('Linear Regression:')
print('R2 Score:', lr_score)
print('Room Mean Squared Error:', lr_rmse)
print('Cross Validation', cv_lr.mean())

In [31]:
from sklearn.tree import DecisionTreeRegressor
dt = DecisionTreeRegressor()
dt.fit(Xtrain, Ytrain)

pred_dt = dt.predict(Xtest)
dt_rmse = (mean_squared_error(Ytest, pred_dt))**0.5
dt_score = r2_score(Ytest, pred_dt)
cv_dt = cross_val_score(estimator = dt, X = Xtrain, y = Ytrain, cv = 10)

print('Decision Tree:')
print('R2 Score:', dt_score)
print('Room Mean Squared Error:', dt_rmse)
print('Cross Validation', cv_dt.mean())

In [32]:
from sklearn.svm import SVR
svr = SVR(kernel = 'rbf', gamma = 'scale')
svr.fit(Xtrain, Ytrain)

pred_svr = svr.predict(Xtest)
svr_rmse = (mean_squared_error(Ytest, pred_svr))**0.5
cv_svr = cross_val_score(estimator = svr, X = Xtrain, y = Ytrain, cv = 10)
svr_score = r2_score(Ytest, pred_svr)

print('Support Vector Machine:\n')
print('R2 Score:', svr_score)
print('Room Mean Squared Error:', svr_rmse)
print('Cross Validation', cv_svr.mean())

In [33]:
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor(n_estimators = 60)
rf.fit(Xtrain, Ytrain)

pred_rf = rf.predict(Xtest)
rf_rmse = (mean_squared_error(Ytest, pred_rf))**0.5
rf_score = r2_score(Ytest, pred_rf)
cv_rf = cross_val_score(estimator = rf, X = Xtrain, y = Ytrain, cv = 10)

print('Random Forest:\n')
print('R2 Score:', rf_score)
print('Room Mean Squared Error:', rf_rmse)
print('Cross Validation', cv_rf.mean())

In [34]:
model = ['Linear Regression', 'Decision Tree','Random Forest', 'Support Vector']
scores = [lr_score, dt_score, rf_score, svr_score]
rmse = [lr_rmse, dt_rmse, rf_rmse, svr_rmse, ]
crv = [cv_lr.mean(), cv_dt.mean(),  cv_rf.mean(), cv_svr.mean()]

In [35]:
res_dict = {
    'Model':model,
    'R2_Score': scores, 
    'RMSE': rmse, 
    'Cross Validation': crv
}
Result = pd.DataFrame(res_dict)
Result

In [36]:
plt.plot(model, scores, marker = 'o')
plt.plot(model, rmse, marker = 'o')
plt.plot(model, crv, marker = 'o')
plt.legend(['R2 Score', 'RMSE', 'Cross'])

In [37]:
plt.bar(model, scores, width = 0.3)
plt.xlabel('Model')
plt.ylabel('R2 Score')


In [38]:
plt.bar(model, rmse, width = 0.3)
plt.xlabel('Model')
plt.ylabel('RMSE')

In [39]:
#Plot cross validation score

plt.bar(model, crv, width = 0.3)
plt.xlabel('Model')
plt.ylabel('CV Score')
