In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score,KFold
from sklearn.ensemble import AdaBoostClassifier,GradientBoostingClassifier,RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
df=pd.read_csv("/kaggle/input/red-wine-quality-cortez-et-al-2009/winequality-red.csv")
df

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df["quality"].value_counts()

In [None]:
import warnings
warnings.filterwarnings("ignore")
for i in range(0,len(df)):
    if df["quality"][i]>6.5:
        df["quality"][i]=1
    else:
        df["quality"][i]=0

In [None]:
df["quality"].value_counts()

In [None]:
df.hist(bins=20,figsize=(16,10))
plt.show()

In [None]:
plt.figure(figsize=(12,8))
corr=df.corr()
sns.heatmap(corr,annot=True)

In [None]:
sns.pairplot(df,hue="quality")
plt.show()

**We need to check if their are any duplicates. If we found then we need to drop them**

In [None]:
df.loc[df.duplicated()]

**The above data are duplicates. So we need to drop them if not they may impact our analysis**

In [None]:
df.drop_duplicates()

**So we had removed all the duplicates i.e, around 240 rows.**

In [None]:
df["quality"].value_counts()

**So, As i told you before this is an imbalanced dataset. We need to do some kind of sampling to get good prediction.**

In [None]:
X=df.iloc[:,:-1]
y=df.iloc[:,-1]
smote=SMOTE(k_neighbors=4)
X,y=smote.fit_resample(X,y)

In [None]:
X

In [None]:
y.value_counts()

**Now, our data set is balanced. So we are ready with our data and now we can start the process.**

In [None]:
sc=StandardScaler()
X=sc.fit_transform(X)

**We are now done with the scaling part. Now we can train the model and start testing.**

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=100)

In [None]:
model=LogisticRegression()
model.fit(X_train,y_train)
pred=model.predict(X_test)
acc_score=accuracy_score(y_test,pred)
print("accuracy_score = ",acc_score)

In [None]:
cm=confusion_matrix(y_test,pred)
print("confusion matrix of Logistic Regression model \n",cm)

In [None]:
cls=classification_report(y_test,pred)
print("classification_report \n",cls)

In [None]:
fold=KFold(n_splits=10,shuffle=True,random_state=20)
score=cross_val_score(model,X,y,cv=fold)
#print("10 KFold scores \n",score)
mean=np.array(score).mean()
print("K-fold of Logistic regression model \n",mean)

In [None]:
ada=AdaBoostClassifier()
ada.fit(X_train,y_train)
pred=ada.predict(X_test)
acc_score=accuracy_score(y_test,pred)
print(acc_score)

In [None]:
from sklearn.metrics import confusion_matrix
cm=confusion_matrix(y_test,pred)
print("confusion matrix of AdaBoostClassifier model \n",cm)

In [None]:
cls=classification_report(y_test,pred)
print("classification_report \n",cls)

In [None]:
fold=KFold(n_splits=10,shuffle=True,random_state=20)
score=cross_val_score(ada,X,y,cv=fold)
#print("10 KFold scores \n",score)
mean=np.array(score).mean()
print("K-Fold of AdaBoostClasifier model \n",mean)

In [None]:
gbc=GradientBoostingClassifier()
gbc.fit(X_train,y_train)
pred=gbc.predict(X_test)
acc_score=accuracy_score(y_test,pred)
print(acc_score)

In [None]:
cm=confusion_matrix(y_test,pred)
print("confusion matrix of GradientBoostingClassifier model \n",cm)

In [None]:
cls=classification_report(y_test,pred)
print("classification_report \n",cls)

In [None]:
fold=KFold(n_splits=10,shuffle=True,random_state=20)
score=cross_val_score(gbc,X,y,cv=fold)
#print(score)
mean=np.array(score).mean()
print("K-Fold of GradientBoostingClassifier model\n",mean)

In [None]:
rfc=RandomForestClassifier()
rfc.fit(X_train,y_train)
pred=rfc.predict(X_test)
acc_score=accuracy_score(y_test,pred)
print(acc_score)

In [None]:
cm=confusion_matrix(y_test,pred)
print("confusion matrix of RandomForestClassifier model \n",cm)

In [None]:
cls=classification_report(y_test,pred)
print("classification_report \n",cls)

In [None]:
fold=KFold(n_splits=10,shuffle=True,random_state=20)
score=cross_val_score(rfc,X,y,cv=fold)
#print(score)
mean=np.array(score).mean()
print("K-Fold of RandomForestClassifier model \n",mean)

In [None]:
dc=DecisionTreeClassifier()
dc.fit(X_train,y_train)
pred=dc.predict(X_test)
acc_score=accuracy_score(y_test,pred)
print(acc_score)

In [None]:
cm=confusion_matrix(y_test,pred)
print("confusion matrix of DecisionTreeClassifier model \n",cm)

In [None]:
cls=classification_report(y_test,pred)
print("classification_report \n",cls)

In [None]:
fold=KFold(n_splits=10,shuffle=True,random_state=20)
score=cross_val_score(rfc,X,y,cv=fold)
#print(score)
mean=np.array(score).mean()
print("K-Fold of DecisionTreeClassifier model \n",mean)

**Models  ----------------  Accuracy**
* **1. Logistic Regression------- 80.96%**
* **2. AdaBoostClassifier ------- 86.28%**
* **3. GradientBoostingClassifier ------- 90.15%**
* **4. RandomForestClassifier ------- 94.68%**
* **5. DecisionTreeClassifier ------- 94.60%**

**If You Guys Liked or found something Interesting and learn't something new.**
# please upvote my work