# Final Exam 2563 - Support Vector Machine (Wine Quality Problem)

This exam problem has an objective to develop a support vector machine model to predict the quality of red wine from 11 features including

1. fixed acidity
2. volatile acidity
3. citric acid
4. residual sugar
5. chlorides
6. free sulfur dioxide
7. total sulfur dioxide
8. density
9. pH
10. sulphates
11. alcohol

The label is a wine quality: <br>
-- 0 = wine has bad quality <br>
-- 1 = wine has good quality

In [1]:
# used for manipulating directory paths
import os

# Scientific and vector computation for python
import numpy as np

# Plotting library
from matplotlib import pyplot

# Optimization module in scipy
from scipy import optimize

# library written for this exam
import utilsSVM as utils

# tells matplotlib to embed plots within the notebook
%matplotlib inline

### We start this exam problem by first loading the dataset

In [2]:
# Read tab separated data
data_train = np.loadtxt(os.path.join('Data', 'SVMWineData_training.txt'))
data_cv = np.loadtxt(os.path.join('Data', 'SVMWineData_cv.txt'))

# First 11 columns of data are features and the last column is the label.
# Matrix X contains three features while vector y contains the label.

X, y = data_train[:, 0:11], data_train[:, 11]
Xval, yval = data_cv[:, 0:11], data_cv[:, 11]

m = y.size  # number of training examples

In [4]:
def gaussianKernel(x1, x2, sigma):
  
    sim = 0
    # ====================== YOUR CODE HERE ======================

    sim = np.exp(-np.sum((x1 - x2) ** 2) / (2 * (sigma ** 2)))

    # =============================================================
    return sim

In [5]:
def Params(X, y, Xval, yval):

   
    # You need to return the following variables correctly.
    C = 1
    sigma = 0.3
    
    # ====================== YOUR CODE HERE ======================

    C_array = np.array([0.3, 0.6, 1])
    sigma_array = np.array([0.3, 0.6, 1])

    err_array = np.zeros([C_array.size, sigma_array.size])
    
    for i in np.arange(C_array.size):
        for j in np.arange(sigma_array.size):
            model= utils.svmTrain(X, y, C_array[i], gaussianKernel, args=(sigma_array[j],))
            predictions = utils.svmPredict(model, Xval)
            pred_error = np.mean(predictions != yval)
            
            err_array[i, j] = pred_error
        
    ind = np.unravel_index(np.argmin(err_array, axis = None), err_array.shape)
    C = C_array[ind[0]]
    sigma = sigma_array[ind[1]]
    
    # ============================================================
    return C, sigma

In [6]:
# Determine the best SVM parameters (C and sigma) here
C, sigma = Params(X, y, Xval, yval)

# Train the SVM using the best parameters (C and sigma) we got from dataset3Params function
model = utils.svmTrain(X, y, C, gaussianKernel, args=(sigma,))

print(C, sigma)

1.0 1.0


In [9]:
p = utils.svmPredict(model, X)

print('Training Accuracy: %.2f' % (np.mean(p == y) * 100))

Training Accuracy: 98.25


In [10]:
p = utils.svmPredict(model, Xval)

print('Cross Accuracy: %.2f' % (np.mean(p == yval) * 100))

Training Accuracy: 67.17


Baware using same notation "model"

In [11]:
# Determine the best SVM parameters (C and sigma) here
C =0.1


model = utils.svmTrain(X, y, C, utils.linearKernel, 1e-3, 20)

print(C)

0.1


In [15]:
p = utils.svmPredict(model, X)

print('Training Accuracy: %.2f' % (np.mean(p == y) * 100))

Training Accuracy: 75.33


In [16]:
p = utils.svmPredict(model, Xval)

print('Cross Accuracy: %.2f' % (np.mean(p == yval) * 100))

Cross Accuracy: 72.18


In [None]:
print(C)
print(sigma)

In [7]:
C, sigma = Params(X, y, Xval, yval)
print(C,sigma)

1.0 1.0
