In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split

Loading the dataset

In [None]:
wine = pd.read_csv('/kaggle/input/red-wine-quality-cortez-et-al-2009/winequality-red.csv')
wine.head()

Dropping duplicates and shuffling the dataset

In [None]:
wine = wine.drop_duplicates()
wine = wine.sample(frac=1)


Restraining our 10 groups of red wine to 2 - good and bad.

In [None]:
wine['range'] = pd.cut(wine.quality, bins=[0,5,10], labels=['bad','good'])

In [None]:
wine

We identify the target labels and we split the dataset into training and testing groups. 

In [None]:

x = wine.drop(['quality','range'],axis=1)
y= wine[['range']]


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

We train the Decision Tree Classifier using max_depth=5

In [None]:
from sklearn.tree import DecisionTreeClassifier

classifier = DecisionTreeClassifier(criterion='gini', max_depth=3)
model = classifier.fit(x_train, y_train)
classifier.score(x_train, y_train)

In [None]:
model.score(x_test,y_test)

Confusion matrix for the two classes.

In [None]:
from sklearn.metrics import confusion_matrix

y_predict_test = classifier.predict(x_test)
confusion_matrix(y_test, y_predict_test)

Classification report with Precision,Recall and F1-score for each class.

In [None]:
from sklearn.metrics import classification_report

print(classification_report(y_test, y_predict_test))

Most important features for our model.

In [None]:
model.classes_

In [None]:
model.feature_importances_

In [None]:
list(zip(x.columns, model.feature_importances_))

In [None]:
import matplotlib.pyplot as plt
plt.figure()
plt.title("Feature importances")
plt.barh(x.columns, model.feature_importances_, 1)

In [None]:
from sklearn import tree
import matplotlib.pyplot as plt # data visualization

plt.figure(figsize=(20,10))

tree.plot_tree(model, feature_names = x.columns, class_names = model.classes_, label='root') 

Ensemble learning - AdaBoost Classifier

In [None]:
from sklearn.ensemble import AdaBoostClassifier
ada = AdaBoostClassifier(DecisionTreeClassifier(),n_estimators=100, random_state=0)
ada.fit(x_train,y_train)

In [None]:
predictions = ada.predict(x_test)

In [None]:
from sklearn.metrics import classification_report
print(classification_report(y_test,predictions))