In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import learning_curve
from sklearn.model_selection import train_test_split

In [None]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-white.csv"
data = pd.read_csv(url, sep=';')
print(data.info())
data

In [None]:
x = data[['alcohol']].values
y = data['quality'].values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size =.2, random_state = 0)

In [None]:
#fit the data into regression model
LR = LinearRegression()
LR.fit(x_train, y_train)

In [None]:
#fit a polynomial regression model to the data
#polynomial line gives better fit than linear line
poly = PolynomialFeatures(degree = 2)
x_poly_train = poly.fit_transform(x_train)
x_poly_test = poly.fit_transform(x_test)
poly_reg = LinearRegression()
poly_reg.fit(x_poly_train, y_train)

In [None]:
print(x_train.shape)
print(x_poly_train.shape)

In [None]:
y_pred_lin = LR.predict(x_test)
y_pred_poly = poly_reg.predict(x_poly_test)

In [None]:
#printing the performance of regression and polynomial models 

print("Linear Regression Metrics : ")
mse_lin = mean_squared_error(y_test, y_pred_lin)
rmse_lin = np.sqrt(mse_lin)
r2_lin = r2_score(y_test, y_pred_lin)

print("Mean Squared Error : ", mse_lin)
print("Root Mean Squared Error : ", rmse_lin)
print("R2 Score : ", r2_lin)

In [None]:
print("Polynomial Regression Metrics : ")
mse_poly = mean_squared_error(y_test, y_pred_poly)
rmse_poly = np.sqrt(mse_poly)
r2_poly = r2_score(y_test, y_pred_poly)

print("Mean Squared Error : ", mse_poly)
print("Root Mean Squared Error : ", rmse_poly)
print("R2 Score : ", r2_poly)

In [None]:
#ploting the learning curvees

train_sizes, train_scores_lin, test_scores_lin = learning_curve(LR, x, y, cv = 5)
train_sizes, train_scores_poly, test_scores_poly = learning_curve(poly_reg, x_poly_train, y_train, cv = 5)

train_mean_lin = np.mean(train_scores_lin, axis = 1)
train_std_lin = np.std(train_scores_lin, axis = 1)

test_mean_lin = np.mean(test_scores_lin, axis = 1)
test_std_lin = np.std(test_scores_lin, axis = 1)

train_mean_poly = np.mean(train_scores_poly, axis = 1)
train_std_poly = np.std(train_scores_poly, axis = 1)

test_mean_poly = np.mean(test_scores_poly, axis = 1)
test_std_poly = np.std(test_scores_poly, axis = 1)

plt.figure(figsize=(12,6))
plt.subplot(1,2,1)
plt.plot(train_sizes, train_mean_lin, label = 'Training Score', color= 'Red')
plt.plot(train_sizes, test_mean_lin, label = 'Cross-Validation Score', color= 'Green')
plt.fill_between(train_sizes, train_mean_lin - train_std_lin, train_mean_lin + train_std_lin, alpha = 0.1, color= 'yellow')
plt.fill_between(train_sizes, test_mean_lin - test_std_lin, test_mean_lin + test_std_lin, alpha = 0.1, color = 'Blue')
plt.xlabel('Training Set Size')
plt.show()

plt.figure(figsize=(12,6))
plt.subplot(1,2,2)
plt.plot(train_sizes, train_mean_poly, label = 'Training Score', color= 'Red')
plt.plot(train_sizes, test_mean_poly, label = 'Cross-Validation Score', color= 'Green')
plt.fill_between(train_sizes, train_mean_poly - train_std_poly, train_mean_poly + train_std_poly, alpha = 0.1, color= 'yellow')
plt.fill_between(train_sizes, test_mean_poly - test_std_poly, test_mean_poly + test_std_poly, alpha = 0.1, color = 'Blue')
plt.xlabel('Training Set Size')
plt.show()

