In [None]:
import pandas as pd
import numpy as np
import random
import numpy as np

from sklearn import linear_model
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, recall_score
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier

In [None]:
# Enter your CSV path
csv_path = "/content/winequality-red.csv"

In [None]:
df = pd.read_csv(csv_path)

In [None]:
wines = df.sample(frac=1).copy()
train_dataset = wines.sample(frac=0.8, random_state=0)
wines = wines.drop(train_dataset.index)
validation_dataset = wines.sample(frac=0.5, random_state=0)
test_dataset = wines.drop(validation_dataset.index)
# test_dataset = wines.drop(train_dataset.index)

In [None]:
# index_names = wines[wines["quality"] == 3].index
# wines.drop(index_names, inplace=True)

In [None]:
wines["quality"].hist(figsize=(8,6))

In [None]:
print(train_dataset)
print(validation_dataset)
print(test_dataset)

assert len(train_dataset) + len(validation_dataset) + len(test_dataset) == len(df)

In [None]:
# Train data
X_train = train_dataset.values[:, :11]
Y_train = train_dataset.values[:, 11]

# # # Validation data
X_val = validation_dataset.values[:, :11]
Y_val = validation_dataset.values[:, 11]

# Test data
X_test = test_dataset.values[:, :11]
Y_test = test_dataset.values[:, 11]

In [None]:
rbf = svm.SVC(kernel='rbf', gamma="auto", C=0.8).fit(X_train, Y_train)
poly = svm.SVC(kernel='poly', C=0.8, coef0=2).fit(X_train, Y_train)
linear = svm.SVC(kernel='linear', C=0.8).fit(X_train, Y_train)
random_forest_classifier = RandomForestClassifier(n_estimators=100).fit(X_train, Y_train)
knn = KNeighborsClassifier(n_neighbors=5).fit(X_train, Y_train)

In [None]:
forrest_cv = cross_val_score(random_forest_classifier, X_val, Y_val, cv=5)

In [None]:
print(f"Random forest CV accuracy: {forrest_cv.mean():.2f} % with a standard deviation of: {forrest_cv.std():.2f} ")

In [None]:
knn_cv = cross_val_score(knn, X_val, Y_val, cv=5)

In [None]:
print(f"KNN CV accuracy: {knn_cv.mean():.2f} % with a standard deviation of: {knn_cv.std():.2f} ")

In [None]:
rbf_cv = cross_val_score(rbf, X_val, Y_val, cv=5)
poly_cv = cross_val_score(poly, X_val, Y_val, cv=5)
linear_cv = cross_val_score(linear, X_val, Y_val, cv=5)

In [None]:
print(f"Rbf CV accuracy: {rbf_cv.mean():.2f} % with a standard deviation of: {rbf_cv.std():.2f} ")
print(f"Polynomial CV accuracy: {poly_cv.mean():.2f} % with a standard deviation of: {poly_cv.std():.2f} ")
print(f"Linear CV accuracy: {linear_cv.mean():.2f} % with a standard deviation of: {linear_cv.std():.2f} ")

In [None]:
poly_pred = poly.predict(X_test)
rbf_pred = rbf.predict(X_test)
linear_pred = linear.predict(X_test)
random_forest_pred = random_forest_classifier.predict(X_test)
knn_pred = knn.predict(X_test)

In [None]:
random_forest_acc = accuracy_score(Y_test, random_forest_pred)
random_forest_recall = recall_score(Y_test, random_forest_pred, average="weighted")
random_forest_f1 = f1_score(Y_test, random_forest_pred, average="weighted")
random_forest_conf = confusion_matrix(Y_test, random_forest_pred)
print("Random forest:")
print(f'Accuracy: {random_forest_acc*100:.2f} %')
print(f'Recall: {random_forest_recall*100:.2f} %')
print(f'F1: {random_forest_f1*100:.2f} %')
print('Confusion Matrix:')
print(random_forest_conf)

Random forest:
Accuracy: 75.00 %
Recall: 75.00 %
F1: 73.45 %
Confusion Matrix:
[[ 0  1  2  0  0  0]
 [ 0  0  2  1  0  0]
 [ 0  0 61 10  1  0]
 [ 0  0 11 48  5  0]
 [ 0  0  0  3 10  0]
 [ 0  0  0  1  3  1]]


In [None]:
knn_acc = accuracy_score(Y_test, knn_pred)
knn_recall = recall_score(Y_test, knn_pred, average="micro")
knn_f1 = f1_score(Y_test, knn_pred, average="weighted")
knn_conf = confusion_matrix(Y_test, knn_pred)
print("K-Nearest neighbors:")
print(f'Accuracy: {knn_acc*100:.2f} %')
print(f'Recall: {knn_recall*100:.2f} %')
print(f'F1: {knn_f1*100:.2f} %')
print('Confusion Matrix:')
print(knn_conf)

K-Nearest neighbors:
Accuracy: 43.75 %
Recall: 43.75 %
F1: 42.70 %
Confusion Matrix:
[[ 0  0  1  2  0  0]
 [ 0  0  2  1  0  0]
 [ 0  2 35 34  1  0]
 [ 0  0 27 28  9  0]
 [ 0  0  2  4  7  0]
 [ 0  0  0  2  3  0]]


In [None]:
poly_accuracy = accuracy_score(Y_test, poly_pred)
poly_recall = recall_score(Y_test, poly_pred, average="weighted")
poly_f1 = f1_score(Y_test, poly_pred, average='weighted')
poly_conf = confusion_matrix(Y_test, poly_pred)
print("Polynomial Kernel:")
print(f'Accuracy: {poly_accuracy*100:.2f} %')
print(f'Recall: {poly_recall*100:.2f} %')
print(f'F1: {poly_f1*100:.2f} %')
print('Confusion Matrix:')
print(poly_conf)

Polynomial Kernel:
Accuracy: 56.88 %
Recall: 56.88 %
F1: 52.41 %
Confusion Matrix:
[[ 0  0  0  3  0  0]
 [ 0  0  3  0  0  0]
 [ 0  0 40 32  0  0]
 [ 0  0 13 51  0  0]
 [ 0  0  0 13  0  0]
 [ 0  0  0  5  0  0]]


In [None]:
rbf_accuracy = accuracy_score(Y_test, rbf_pred)
rbf_recall = recall_score(Y_test, rbf_pred, average="weighted")
rbf_f1 = f1_score(Y_test, rbf_pred, average='weighted')
rbf_conf = confusion_matrix(Y_test, poly_pred)
print("RBF Kernel:")
print(f'Accuracy:  {rbf_accuracy*100:.2f} %')
print(f'Recall: {rbf_recall*100:.2f} %')
print(f'F1: {rbf_f1*100:.2f} %')
print('Confusion Matrix:')
print(rbf_conf)

RBF Kernel:
Accuracy:  57.50 %
Recall: 57.50 %
F1: 55.55 %
Confusion Matrix:
[[ 0  0  0  3  0  0]
 [ 0  0  3  0  0  0]
 [ 0  0 40 32  0  0]
 [ 0  0 13 51  0  0]
 [ 0  0  0 13  0  0]
 [ 0  0  0  5  0  0]]


In [None]:
linear_acc = accuracy_score(Y_test, linear_pred)
linear_recall = recall_score(Y_test, linear_pred, average="weighted")
linear_f1 = f1_score(Y_test, linear_pred, average='weighted')
linear_conf = confusion_matrix(Y_test, linear_pred)
print("Linear Kernel:")
print(f'Accuracy: {linear_acc*100:.2f} %')
print(f'Recall: {linear_recall*100:.2f} %')
print(f'F1: {linear_f1*100:.2f} %')
print('Confusion Matrix:')
print(linear_conf)

Linear Kernel:
Accuracy: 63.12 %
Recall: 63.12 %
F1: 58.12 %
Confusion Matrix:
[[ 0  0  3  0  0  0]
 [ 0  0  3  0  0  0]
 [ 0  0 56 16  0  0]
 [ 0  0 19 45  0  0]
 [ 0  0  0 13  0  0]
 [ 0  0  0  5  0  0]]
