In [8]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import cross_val_score
import numpy as np

In [2]:
df = pd.read_csv('wine.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   alcohol                       178 non-null    float64
 1   malic_acid                    178 non-null    float64
 2   ash                           178 non-null    float64
 3   alcalinity_of_ash             178 non-null    float64
 4   magnesium                     178 non-null    float64
 5   total_phenols                 178 non-null    float64
 6   flavanoids                    178 non-null    float64
 7   nonflavanoid_phenols          178 non-null    float64
 8   proanthocyanins               178 non-null    float64
 9   color_intensity               178 non-null    float64
 10  hue                           178 non-null    float64
 11  od280/od315_of_diluted_wines  178 non-null    float64
 12  proline                       178 non-null    float64
 13  label

In [3]:
df.corr()

Unnamed: 0,alcohol,malic_acid,ash,alcalinity_of_ash,magnesium,total_phenols,flavanoids,nonflavanoid_phenols,proanthocyanins,color_intensity,hue,od280/od315_of_diluted_wines,proline,label
alcohol,1.0,0.094397,0.211545,-0.310235,0.270798,0.289101,0.236815,-0.155929,0.136698,0.546364,-0.071747,0.072343,0.64372,-0.328222
malic_acid,0.094397,1.0,0.164045,0.2885,-0.054575,-0.335167,-0.411007,0.292977,-0.220746,0.248985,-0.561296,-0.36871,-0.192011,0.437776
ash,0.211545,0.164045,1.0,0.443367,0.286587,0.12898,0.115077,0.18623,0.009652,0.258887,-0.074667,0.003911,0.223626,-0.049643
alcalinity_of_ash,-0.310235,0.2885,0.443367,1.0,-0.083333,-0.321113,-0.35137,0.361922,-0.197327,0.018732,-0.273955,-0.276769,-0.440597,0.517859
magnesium,0.270798,-0.054575,0.286587,-0.083333,1.0,0.214401,0.195784,-0.256294,0.236441,0.19995,0.055398,0.066004,0.393351,-0.209179
total_phenols,0.289101,-0.335167,0.12898,-0.321113,0.214401,1.0,0.864564,-0.449935,0.612413,-0.055136,0.433681,0.699949,0.498115,-0.719163
flavanoids,0.236815,-0.411007,0.115077,-0.35137,0.195784,0.864564,1.0,-0.5379,0.652692,-0.172379,0.543479,0.787194,0.494193,-0.847498
nonflavanoid_phenols,-0.155929,0.292977,0.18623,0.361922,-0.256294,-0.449935,-0.5379,1.0,-0.365845,0.139057,-0.26264,-0.50327,-0.311385,0.489109
proanthocyanins,0.136698,-0.220746,0.009652,-0.197327,0.236441,0.612413,0.652692,-0.365845,1.0,-0.02525,0.295544,0.519067,0.330417,-0.49913
color_intensity,0.546364,0.248985,0.258887,0.018732,0.19995,-0.055136,-0.172379,0.139057,-0.02525,1.0,-0.521813,-0.428815,0.3161,0.265668


In [4]:
data = df.copy()[['total_phenols', 'flavanoids', 'od280/od315_of_diluted_wines']]
label = df.copy()['label']

In [32]:
C = [0.01, 0.1, 1, 10, 100]
gamma = [0.01, 0.1, 0.5, 1, 10]
best_score = 0
best_param = []

for c in C:
    for g in gamma:
        clf = SVC(C=c, gamma=g)
        scores = cross_val_score(clf, data, label)
        m_score = scores.mean()
        if m_score >= best_score:
            if m_score == best_score:
                best_param.append({'C':c, 'gamma':g})
            else:
                best_param = [{'C':c, 'gamma':g}]
            best_score = m_score
                
print(f"Best score: {best_score*100:.3f}")
loop_num = len(best_param)
for i in range(loop_num):
    c = best_param[i]["C"]
    g = best_param[i]["gamma"]
    print(f"Parameter{i+1}: C={c:>4}\tgamma={g}")

Best score: 84.302
Parameter1: C=   1	gamma=0.1
Parameter2: C=  10	gamma=0.01


In [33]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

In [34]:
train_d, test_d, train_l, test_l = train_test_split(data, label, test_size=0.3)

In [37]:
c_list = [0.001, 0.01, 0.1, 0.5, 1, 5, 10, 100]
gamma_list = [0.001, 0.01, 0.1, 0.5, 1, 5, 10, 100]

In [38]:
param_dic = {'C':c_list, 'gamma':gamma_list}

In [42]:
clf = GridSearchCV(SVC(), param_dic, cv=4)

In [43]:
clf.fit(train_d, train_l)

GridSearchCV(cv=4, estimator=SVC(),
             param_grid={'C': [0.001, 0.01, 0.1, 0.5, 1, 5, 10, 100],
                         'gamma': [0.001, 0.01, 0.1, 0.5, 1, 5, 10, 100]})

In [44]:
clf.cv_results_

{'mean_fit_time': array([0.00647235, 0.00450838, 0.0028556 , 0.00282621, 0.00274235,
        0.00271237, 0.00285941, 0.0028913 , 0.00354904, 0.00390524,
        0.00296557, 0.00282973, 0.0027135 , 0.0027076 , 0.00279921,
        0.00287122, 0.00275141, 0.00300837, 0.00303727, 0.00282514,
        0.00277865, 0.00278163, 0.00287265, 0.00298047, 0.00286931,
        0.00266081, 0.00260943, 0.00300848, 0.00309026, 0.00288135,
        0.00301516, 0.00307018, 0.00288451, 0.00279725, 0.0025723 ,
        0.00258142, 0.0025906 , 0.00302875, 0.00288373, 0.0028758 ,
        0.00275862, 0.0026601 , 0.00263321, 0.00269848, 0.00275111,
        0.00303662, 0.00302988, 0.00336903, 0.00335717, 0.00268608,
        0.00277579, 0.00286663, 0.00277227, 0.00295472, 0.00300866,
        0.00375414, 0.00327438, 0.00270087, 0.00308365, 0.00305808,
        0.00282961, 0.00307488, 0.00315875, 0.00311702]),
 'std_fit_time': array([3.51170213e-04, 1.85182268e-03, 1.44782553e-04, 4.41965929e-05,
        7.42799913e-0

In [48]:
loop_len = len(clf.cv_results_["mean_test_score"])

In [59]:
for i in range(loop_len):
    print(clf.cv_results_["params"][i], "\t\tscore:{0:.3f}%" .format(clf.cv_results_["mean_test_score"][i]*100))

{'C': 0.001, 'gamma': 0.001} 		score:39.516%
{'C': 0.001, 'gamma': 0.01} 		score:39.516%
{'C': 0.001, 'gamma': 0.1} 		score:39.516%
{'C': 0.001, 'gamma': 0.5} 		score:39.516%
{'C': 0.001, 'gamma': 1} 		score:39.516%
{'C': 0.001, 'gamma': 5} 		score:39.516%
{'C': 0.001, 'gamma': 10} 		score:39.516%
{'C': 0.001, 'gamma': 100} 		score:39.516%
{'C': 0.01, 'gamma': 0.001} 		score:39.516%
{'C': 0.01, 'gamma': 0.01} 		score:39.516%
{'C': 0.01, 'gamma': 0.1} 		score:39.516%
{'C': 0.01, 'gamma': 0.5} 		score:39.516%
{'C': 0.01, 'gamma': 1} 		score:39.516%
{'C': 0.01, 'gamma': 5} 		score:39.516%
{'C': 0.01, 'gamma': 10} 		score:39.516%
{'C': 0.01, 'gamma': 100} 		score:39.516%
{'C': 0.1, 'gamma': 0.001} 		score:39.516%
{'C': 0.1, 'gamma': 0.01} 		score:39.516%
{'C': 0.1, 'gamma': 0.1} 		score:67.742%
{'C': 0.1, 'gamma': 0.5} 		score:81.452%
{'C': 0.1, 'gamma': 1} 		score:81.452%
{'C': 0.1, 'gamma': 5} 		score:53.226%
{'C': 0.1, 'gamma': 10} 		score:39.516%
{'C': 0.1, 'gamma': 100} 		score:39.516

In [62]:
print(f"Best score: {clf.best_score_*100:.3f}%\nBest param: {clf.best_params_}")

Best score: 83.871%
Best param: {'C': 100, 'gamma': 0.5}


In [63]:
clf = SVC(C=100, gamma=0.5)
clf.fit(train_d, train_l)
res = clf.predict(test_d)

In [64]:
from sklearn.metrics import accuracy_score

In [65]:
score = accuracy_score(res, test_l)
print(f"Test score: {score*100:.3f}%")

Test score: 83.333%
