## Importing the "Red Wine Quality" Dataset

https://archive.ics.uci.edu/dataset/186/wine+quality

In [1]:
import pandas as pd
import numpy as np

In [2]:
red_wine_quality_file = "winequality-red.csv"

red_wine_quality_df = pd.read_csv(red_wine_quality_file, sep=';')

In [3]:
print(f'Shape of the dataset : {red_wine_quality_df.shape}')
print('---------------------------------------------------')
red_wine_quality_df.head(5)

Shape of the dataset : (1599, 12)
---------------------------------------------------


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


## Seperating Dataset into "Features" Matrix & "Target" Vector

In [4]:
# X : features matrix
X = red_wine_quality_df.iloc[:, :-1]
X.shape

(1599, 11)

In [5]:
# y : target vector
y = red_wine_quality_df.iloc[:, -1]
y.shape

(1599,)

## Creating our Machine Learning Algorithms

In [6]:
models = []

### Logistic Regression

#### Import Model

In [7]:
from sklearn.linear_model import LogisticRegression

#### Instantiate Model

In [8]:
logistic_regression_model = LogisticRegression(solver='liblinear')

models.append(logistic_regression_model)

logistic_regression_model.get_params()

{'C': 1.0,
 'class_weight': None,
 'dual': False,
 'fit_intercept': True,
 'intercept_scaling': 1,
 'l1_ratio': None,
 'max_iter': 100,
 'multi_class': 'auto',
 'n_jobs': None,
 'penalty': 'l2',
 'random_state': None,
 'solver': 'liblinear',
 'tol': 0.0001,
 'verbose': 0,
 'warm_start': False}

### K-Nearest Neighbors

#### Import Model

In [9]:
from sklearn.neighbors import KNeighborsClassifier

#### Instantiate Model

In [10]:
k = 5
knn_model = KNeighborsClassifier(n_neighbors=k)

models.append(knn_model)

knn_model.get_params()

{'algorithm': 'auto',
 'leaf_size': 30,
 'metric': 'minkowski',
 'metric_params': None,
 'n_jobs': None,
 'n_neighbors': 5,
 'p': 2,
 'weights': 'uniform'}

### Random Forest

#### Import the Model

In [11]:
from sklearn.ensemble import RandomForestClassifier

#### Instantiate the Model

In [12]:
trees = 100
depth = 5

random_forest_model = RandomForestClassifier(n_estimators=trees, max_depth=depth)

models.append(random_forest_model)

random_forest_model.get_params()

{'bootstrap': True,
 'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': 5,
 'max_features': 'sqrt',
 'max_leaf_nodes': None,
 'max_samples': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 100,
 'n_jobs': None,
 'oob_score': False,
 'random_state': None,
 'verbose': 0,
 'warm_start': False}

### Support Vector Classifier (SVM Classifier)

#### Import the Model

In [13]:
from sklearn.svm import SVC

#### Instantiate the Model

In [14]:
svc_model = SVC()

models.append(svc_model)

svc_model.get_params()

{'C': 1.0,
 'break_ties': False,
 'cache_size': 200,
 'class_weight': None,
 'coef0': 0.0,
 'decision_function_shape': 'ovr',
 'degree': 3,
 'gamma': 'scale',
 'kernel': 'rbf',
 'max_iter': -1,
 'probability': False,
 'random_state': None,
 'shrinking': True,
 'tol': 0.001,
 'verbose': False}

### Decision Tree Classifier

#### Import the Model

In [15]:
from sklearn.tree import DecisionTreeClassifier

#### Instantiate the Model

In [16]:
depth = 5

decision_tree_model = DecisionTreeClassifier(max_depth=depth)

models.append(decision_tree_model)

decision_tree_model.get_params()

{'ccp_alpha': 0.0,
 'class_weight': None,
 'criterion': 'gini',
 'max_depth': 5,
 'max_features': None,
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'random_state': None,
 'splitter': 'best'}

## K-Fold Cross Validation

#### 5-Fold Cross Validation

In [17]:
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score

K_FOLDS = 5

evaluations = {}
for model in models:

  # Get the  number of scores as Numpy array...
  model_accuracy_scores = cross_val_score(
      model,
      X,
      y,
      cv=K_FOLDS,
      scoring='accuracy',
  )

  # Average the scores...
  model_mean_score = model_accuracy_scores.mean()

  # Save accuracy score...
  evaluations[model] = model_mean_score

In [18]:
print('********************************************')
print(f"{K_FOLDS}-Fold Cross Validation - Evaluation Results")
print('********************************************')
print()

for model in evaluations.keys():
  print(f'{model} | Accuracy score of : {evaluations[model] * 100 : .2f}%')
  print('------------------------------------------------------')

********************************************
5-Fold Cross Validation - Evaluation Results
********************************************

LogisticRegression(solver='liblinear') | Accuracy score of :  56.91%
------------------------------------------------------
KNeighborsClassifier() | Accuracy score of :  44.21%
------------------------------------------------------
RandomForestClassifier(max_depth=5) | Accuracy score of :  58.41%
------------------------------------------------------
SVC() | Accuracy score of :  50.22%
------------------------------------------------------
DecisionTreeClassifier(max_depth=5) | Accuracy score of :  54.60%
------------------------------------------------------
