In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report,accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split, cross_val_score

In [None]:
df = pd.read_csv('../input/red-wine-quality-cortez-et-al-2009/winequality-red.csv')
df.head()

In [None]:
df.info()

In [None]:
plt.figure(figsize=(100,70))
sns.set(font_scale=5)
plt.subplot(331)
plt.title("Quality vs Fixed Acidity",fontsize=70)
sns.barplot(x = 'quality', y = 'fixed acidity', data = df)

plt.subplot(332)
plt.title("Quality vs Volatile Acidity",fontsize=70)
sns.barplot(x = 'quality', y = 'volatile acidity', data = df)

plt.subplot(333)
plt.title("Quality vs Citric acid",fontsize=70)
sns.barplot(x = 'quality', y = 'citric acid', data = df)

plt.subplot(334)
plt.title("Quality vs Residual sugar",fontsize=70)
sns.barplot(x = 'quality', y = 'residual sugar', data = df)

plt.subplot(335)
plt.title("Quality vs Chlorides",fontsize=70)
sns.barplot(x = 'quality', y = 'chlorides', data = df)

plt.subplot(336)
plt.title("Quality vs free SO2",fontsize=70)
sns.barplot(x = 'quality', y = 'free sulfur dioxide', data = df)

plt.subplot(337)
plt.title("Quality vs total SO2",fontsize=70)
sns.barplot(x = 'quality', y = 'total sulfur dioxide', data = df)

plt.subplot(338)
plt.title("Quality vs Sulphates",fontsize=70)
sns.barplot(x = 'quality', y = 'sulphates', data = df)

plt.subplot(339)
plt.title("Quality vs alcohol",fontsize=70)
sns.barplot(x='quality',y='alcohol',data=df)

In [None]:
bins = (2,6.5,8)
classes = ['bad','good']
df['quality'] = pd.cut(df['quality'],bins=bins,labels=classes)

In [None]:
label_quality =  LabelEncoder()

In [None]:
df['quality'] = label_quality.fit_transform(df['quality'])

In [None]:
df['quality'].value_counts()

**0 indicates bad and 1 indicates good**

In [None]:
plt.figure(figsize=(10,7))
plt.title("Quality vs Xount")
sns.countplot(df['quality'])

In [None]:
df.head()

In [None]:
x = df.drop('quality',axis=1)
y  = df['quality']

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=21)

In [None]:
sc = StandardScaler()

In [None]:
x_train = sc.fit_transform(x_train)
x_test = sc.fit_transform(x_test)

**Random Forest Classifier**

In [None]:
rf = RandomForestClassifier(n_estimators=100)
rf.fit(x_train,y_train)
pred_rf = rf.predict(x_test)

In [None]:
pred1 = accuracy_score(y_test,pred_rf)
print(pred1*100)

In [None]:
cm1 = confusion_matrix(y_test, pred_rf)
ax1 = sns.heatmap(cm1,annot=True)
ax1.set(xlabel='predict',ylabel='true')
plt.show()

**Logistic Regression**

In [None]:
lr = LogisticRegression().fit(x_train,y_train)

In [None]:
pred_lr = lr.predict(x_test)
cm2 = confusion_matrix(y_test,pred_lr)
ax2 = sns.heatmap(cm2,annot=True)
ax2.set(xlabel='predict',ylabel='true')
pred2 = accuracy_score(y_test,pred_lr)
print("logistic regression accuracy score: ",pred2*100)