# Red Wine Quality Prediction

The classes are ordered and not balanced (e.g. there are much more normal wines than excellent or poor ones).

Use machine learning to determine which physiochemical properties make a wine 'good'!


### We are going to do both Multiclass Classification and Binary Classification here



In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier


In [None]:
data=pd.read_csv("../input/red-wine-quality-cortez-et-al-2009/winequality-red.csv")
data

In [None]:
data.info()

In [None]:
data.describe()

In [None]:
data.isnull().sum()

In [None]:
column=data.columns
column

In [None]:
ax=sns.countplot(data = data,y = "quality")

In [None]:
data["quality"].unique()

# Binart Class Conversion

In [None]:
data_B=data.copy()

data_B["quality"] = data_B["quality"].apply(lambda value : 1 if value >= 7 else 0)

In [None]:
data_B["quality"].value_counts()

In [None]:
data_B.head()

# Preprocessing

In [None]:
#splitting for multiclass classification
y=data["quality"].copy()
X=data.drop("quality", axis=1).copy()

#splitting for binary classification
Y=data_B["quality"].copy()

    
#scaler
scaler=StandardScaler()
X=pd.DataFrame(scaler.fit_transform(X),columns=X.columns)

    
#train
X_train , X_test , y_train , y_test = train_test_split(X , y , train_size = 0.70 , random_state = 123)
X_train , X_test , Y_train , Y_test = train_test_split(X , Y , train_size = 0.70 , random_state = 123)

In [None]:
X_train

In [None]:
print("Multiclass label :", y_train.unique())
print("Binary label     :",Y_train.unique())

#  Training and Testing

In [None]:
models={
    "LogisticRegression        ":LogisticRegression(),      
    "DecisionTreeClassifier    ":DecisionTreeClassifier(),
    "RandomForestClassifier    ":RandomForestClassifier(),
    "GradientBoostingClassifier":GradientBoostingClassifier()
}

#Training Multiclass
for name,model in models.items():
    model.fit(X_train,y_train)

print('Multiclass Classification \n--------------------------------')
#Testing Multiclass
for name,model in models.items():
    print(name+ ": {:.2f}%".format(model.score(X_test,y_test)*100))
    
#Training binary  
for name,model in models.items():
    model.fit(X_train,Y_train)

print(' ')
print('Binary Classification \n--------------------------------')
#Testing binary 
for name,model in models.items():
    print(name+ ": {:.2f}%".format(model.score(X_test,Y_test)*100))
    
