# Import the libraries

In [1]:
import numpy as np  
import pandas as pd
import matplotlib.pyplot as plt  
import seaborn as sns 
import warnings
warnings.filterwarnings('ignore')

# Load the Dataset

In [2]:
df = pd.read_csv("winequality-red.csv")
df

# Understanding the data

In [3]:
df.info()

In [4]:
df.describe()

In [5]:
df.isnull().sum()

In [6]:
df.shape

In [7]:
df.duplicated().sum()

In [8]:
df = df.drop_duplicates()

In [9]:
df.shape

# Visualising the Data

In [10]:
sns.pairplot(df) 
plt.show()

In [11]:
plt.figure(figsize=(20,25))
sns.heatmap(df.corr(), annot=True, cmap="YlGnBu")
plt.show()

In [12]:
df.hist(bins=20, figsize=(10, 10))
plt.show()

In [13]:
plt.bar(df['quality'], df['alcohol'])
plt.xlabel('quality')
plt.ylabel('alcohol')
plt.show()

# Split data into Training and Testing data

In [14]:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [15]:
X.head()

In [16]:
y.head()

In [17]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=0)

In [18]:
print('X train shape:', X_train.shape)
print('y train shape:', y_train.shape)

In [19]:
X_train.head()

In [20]:
y_train.head()

# Scaling Training Data: MinMaxScaler

In [21]:
from sklearn.preprocessing import MinMaxScaler,StandardScaler
scaler = MinMaxScaler()

In [22]:
X_scaler = StandardScaler()
y_scaler = StandardScaler()
X_train = X_scaler.fit_transform(X_train)
#y_train = y_scaler.fit_transform(y_train)
X_test = X_scaler.transform(X_test)
#y_test = y_scaler.transform(y_test)

In [23]:
X_train

# Building a linear model

In [24]:
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train,y_train)

In [25]:
lr.coef_

In [26]:
lr.intercept_

In [27]:
lr.score(X_train,y_train)

In [28]:
y_pred = lr.predict(X_test)
y_pred

In [29]:
from sklearn.metrics import r2_score 
r2_score(y_test, y_pred)

In [30]:
y_test = y_test.values.reshape(-1,1)

In [31]:
dframe=pd.DataFrame({'actual':y_test.flatten(),'Predicted': y_pred.flatten().round(0)})
dframe

# Model Evaluation

In [32]:
fig = plt.figure()
plt.scatter(y_test, y_pred)
plt.title('Actual Vs Prediction ')
plt.xlabel('Actual')                         
plt.ylabel('Predicted')
plt.show()

# Save the model

In [33]:
import pickle

with open('model.pkl', 'wb') as files:

    pickle.dump(lr, files)

## Sample Test Cases

inp=[[ 2.10249775,  0.20024074,  1.18265603,  0.08601122, -0.12585524,
       -0.94963251, -0.43326827,  2.14418664, -1.25027497,  0.78265071,
        0.25793281]]
pred=lr.predict(inp)
pred[0]