In [2]:
import numpy as np
from sklearn import svm
from sklearn import metrics
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

Extract Data from CSV files

In [3]:
red_wine_data = np.loadtxt('winequality-red.csv', skiprows=1, delimiter=";", unpack=True)
white_wine_data = np.loadtxt('winequality-white.csv', skiprows=1, delimiter=";", unpack=True)

print(red_wine_data)
red_qual = red_wine_data.T[:, [-1]]
red_wine_data = red_wine_data.T[:, :-1]
print(red_qual)

white_qual = white_wine_data.T[:, [-1]]
white_wine_data = white_wine_data.T[:, :-1]

# normalize inputs
red_wine_data = red_wine_data / red_wine_data.max(axis=0)
white_wine_data = white_wine_data/ white_wine_data.max(axis=0)
print(red_wine_data)

w_input = np.concatenate([red_wine_data, white_wine_data])
w_out = np.concatenate([red_qual,white_qual])

print(len(w_input))
print(len(w_out))

# w_input = merged_data[:, :-1] #all but last column
# w_out = merged_data[:, -1] #last column


[[ 7.4    7.8    7.8   ...  6.3    5.9    6.   ]
 [ 0.7    0.88   0.76  ...  0.51   0.645  0.31 ]
 [ 0.     0.     0.04  ...  0.13   0.12   0.47 ]
 ...
 [ 0.56   0.68   0.65  ...  0.75   0.71   0.66 ]
 [ 9.4    9.8    9.8   ... 11.    10.2   11.   ]
 [ 5.     5.     5.    ...  6.     5.     6.   ]]
[[5.]
 [5.]
 [5.]
 ...
 [6.]
 [5.]
 [6.]]
[[0.46540881 0.44303797 0.         ... 0.87531172 0.28       0.63087248]
 [0.49056604 0.55696203 0.         ... 0.79800499 0.34       0.65771812]
 [0.49056604 0.48101266 0.04       ... 0.81296758 0.325      0.65771812]
 ...
 [0.39622642 0.32278481 0.13       ... 0.85286783 0.375      0.73825503]
 [0.37106918 0.40822785 0.12       ... 0.89027431 0.355      0.68456376]
 [0.37735849 0.19620253 0.47       ... 0.84538653 0.33       0.73825503]]
6497
6497


Split training and testing data

In [4]:

x_train, x_test, y_train, y_test = train_test_split(w_input, w_out, test_size = 0.2)


print(len(x_train))
print(len(x_test))
print(len(y_train))

5197
1300
5197


Build the SVM Model

Linear for red:

In [4]:
C_lin = [1,10,20,30]
lin_acc = []
lin_conf = []

for penalty in C_lin: 
    clf = svm.SVC(kernel='linear', C=penalty)

    clf.fit(x_train, y_train)

    pred = clf.predict(x_test)
    
    lin_acc.append(metrics.accuracy_score(y_test, pred))
    lin_conf.append(metrics.confusion_matrix(y_test, pred))

    print(f"C={penalty}")
    print(f"Accuracy: {metrics.accuracy_score(y_test, pred)}")
    print(metrics.confusion_matrix(y_test, pred))

C=1
Accuracy: 0.5176923076923077
[[  0   0   3   3   0   0   0]
 [  0   0  22  16   0   0   0]
 [  0   0 254 167   0   0   0]
 [  0   0 143 419   0   0   0]
 [  0   0  11 216   0   0   0]
 [  0   0   4  40   0   0   0]
 [  0   0   0   2   0   0   0]]
C=10
Accuracy: 0.5184615384615384
[[  0   0   3   3   0   0   0]
 [  0   0  21  17   0   0   0]
 [  0   0 258 163   0   0   0]
 [  0   0 146 416   0   0   0]
 [  0   0  11 216   0   0   0]
 [  0   0   4  40   0   0   0]
 [  0   0   0   2   0   0   0]]
C=20
Accuracy: 0.5184615384615384
[[  0   0   3   3   0   0   0]
 [  0   0  21  17   0   0   0]
 [  0   0 258 163   0   0   0]
 [  0   0 146 416   0   0   0]
 [  0   0  11 216   0   0   0]
 [  0   0   4  40   0   0   0]
 [  0   0   0   2   0   0   0]]
C=30
Accuracy: 0.5184615384615384
[[  0   0   3   3   0   0   0]
 [  0   0  21  17   0   0   0]
 [  0   0 260 161   0   0   0]
 [  0   0 148 414   0   0   0]
 [  0   0  11 216   0   0   0]
 [  0   0   4  40   0   0   0]
 [  0   0   0   2   0   0

In [5]:
C_poly = [1,10,50,100]

poly_acc = []
poly_conf = []

for penalty in C_poly: 
    clf_p = svm.SVC(kernel='poly', C=penalty)

    clf_p.fit(x_train, y_train)

    pred = clf_p.predict(x_test)
    
    poly_acc.append(metrics.accuracy_score(y_test, pred))
    poly_conf.append(metrics.confusion_matrix(y_test, pred))

    print(f"C={penalty}")
    print(f"Accuracy: {metrics.accuracy_score(y_test, pred)}")
    print(metrics.confusion_matrix(y_test, pred))

C=1
Accuracy: 0.5261538461538462
[[  0   0   3   3   0   0   0]
 [  0   0  21  17   0   0   0]
 [  0   0 258 163   0   0   0]
 [  0   0 138 424   0   0   0]
 [  0   0   5 220   2   0   0]
 [  0   0   0  44   0   0   0]
 [  0   0   0   2   0   0   0]]
C=10
Accuracy: 0.5392307692307692
[[  0   0   3   3   0   0   0]
 [  0   1  20  15   2   0   0]
 [  0   1 254 166   0   0   0]
 [  0   0 132 421   9   0   0]
 [  0   0   3 199  25   0   0]
 [  0   0   0  37   7   0   0]
 [  0   0   0   1   1   0   0]]
C=50
Accuracy: 0.5453846153846154
[[  0   0   3   3   0   0   0]
 [  0   1  22  14   1   0   0]
 [  0   2 254 163   2   0   0]
 [  0   0 131 414  17   0   0]
 [  0   0   2 185  40   0   0]
 [  0   0   0  35   9   0   0]
 [  0   0   0   0   2   0   0]]
C=100
Accuracy: 0.5461538461538461
[[  0   0   3   3   0   0   0]
 [  0   1  23  13   1   0   0]
 [  1   2 259 157   2   0   0]
 [  1   0 131 406  24   0   0]
 [  0   0   6 177  44   0   0]
 [  0   0   0  34  10   0   0]
 [  0   0   0   0   2   

In [45]:
rbf_acc = []
rbf_conf = []

for penalty in C_poly: 
    clf_r = svm.SVC(kernel='rbf', C=penalty)

    clf_r.fit(x_train, y_train)

    pred = clf_r.predict(x_test)
    
    rbf_acc.append(metrics.accuracy_score(y_test, pred))
    rbf_conf.append(metrics.confusion_matrix(y_test, pred))
    
    print(f"C={penalty}")
    print(f"Accuracy: {metrics.accuracy_score(y_test, pred)}")
    print(metrics.confusion_matrix(y_test, pred))



C=1
Accuracy: 0.5269230769230769
[[  0   0   0   2   0   0   0]
 [  0   0  33  24   0   0   0]
 [  0   0 237 177   0   0   0]
 [  0   0 126 448   0   0   0]
 [  0   0   5 206   0   0   0]
 [  0   0   0  41   0   0   0]
 [  0   0   0   1   0   0   0]]
C=10
Accuracy: 0.5430769230769231
[[  0   0   0   2   0   0   0]
 [  0   0  32  25   0   0   0]
 [  0   0 243 171   0   0   0]
 [  0   0 115 454   5   0   0]
 [  0   0   6 196   9   0   0]
 [  0   0   0  38   3   0   0]
 [  0   0   0   1   0   0   0]]
C=50
Accuracy: 0.5607692307692308
[[  0   0   0   2   0   0   0]
 [  0   1  33  22   1   0   0]
 [  3   0 255 153   3   0   0]
 [  0   1 118 441  14   0   0]
 [  0   0   6 173  32   0   0]
 [  0   0   0  29  12   0   0]
 [  0   0   0   1   0   0   0]]
C=100
Accuracy: 0.5615384615384615
[[  0   0   0   2   0   0   0]
 [  0   1  33  22   1   0   0]
 [  3   0 256 152   3   0   0]
 [  0   1 120 436  17   0   0]
 [  0   0   7 167  37   0   0]
 [  0   0   0  29  12   0   0]
 [  0   0   0   1   0   

In [12]:
C_tuning = [75,100,125,150]
gamma_tuning = ['scale', 'auto', 0.1, 0.01]

fin_accuracies = np.zeros((4, 4))

for i in range(len(C_tuning)): 
    for j in range(len(gamma_tuning)):
        clf_r = svm.SVC(kernel='rbf', C=C_tuning[i], gamma=gamma_tuning[j])

        clf_r.fit(x_train, y_train)

        pred = clf_r.predict(x_test)
        
        fin_accuracies[i][j] = metrics.accuracy_score(y_test, pred)
        
        
        print(f"C={C_tuning[i]}, gamma={gamma_tuning[j]}")
        print(f"Accuracy: {metrics.accuracy_score(y_test, pred)}")
        # print(metrics.confusion_matrix(y_test, pred))

C=75, gamma=scale
Accuracy: 0.5507692307692308
C=75, gamma=auto
Accuracy: 0.5338461538461539
C=75, gamma=0.1
Accuracy: 0.5338461538461539
C=75, gamma=0.01
Accuracy: 0.53
C=100, gamma=scale
Accuracy: 0.5515384615384615
C=100, gamma=auto
Accuracy: 0.5323076923076923
C=100, gamma=0.1
Accuracy: 0.5338461538461539
C=100, gamma=0.01
Accuracy: 0.5307692307692308
C=125, gamma=scale
Accuracy: 0.553076923076923
C=125, gamma=auto
Accuracy: 0.5338461538461539
C=125, gamma=0.1
Accuracy: 0.5346153846153846
C=125, gamma=0.01
Accuracy: 0.5307692307692308
C=150, gamma=scale
Accuracy: 0.5523076923076923
C=150, gamma=auto
Accuracy: 0.5346153846153846
C=150, gamma=0.1
Accuracy: 0.5330769230769231
C=150, gamma=0.01
Accuracy: 0.5323076923076923


In [13]:
print(fin_accuracies)

[[0.55076923 0.53384615 0.53384615 0.53      ]
 [0.55153846 0.53230769 0.53384615 0.53076923]
 [0.55307692 0.53384615 0.53461538 0.53076923]
 [0.55230769 0.53461538 0.53307692 0.53230769]]
