In [24]:
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split, GridSearchCV
import numpy as np
from tqdm.notebook import tqdm

In [2]:
df = pd.read_csv('wine.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 178 entries, 0 to 177
Data columns (total 14 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   alcohol                       178 non-null    float64
 1   malic_acid                    178 non-null    float64
 2   ash                           178 non-null    float64
 3   alcalinity_of_ash             178 non-null    float64
 4   magnesium                     178 non-null    float64
 5   total_phenols                 178 non-null    float64
 6   flavanoids                    178 non-null    float64
 7   nonflavanoid_phenols          178 non-null    float64
 8   proanthocyanins               178 non-null    float64
 9   color_intensity               178 non-null    float64
 10  hue                           178 non-null    float64
 11  od280/od315_of_diluted_wines  178 non-null    float64
 12  proline                       178 non-null    float64
 13  label

In [3]:
data = df.drop('label', axis=1)
label = df['label']

In [4]:
train_d, test_d, train_l, test_l = train_test_split(data, label, test_size=0.3, stratify=label)

In [5]:
param_grid = {'C': [0.1, 1, 5, 10, 50, 100, 500, 1000], 'gamma': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1]}

clf = GridSearchCV(SVC(), param_grid, cv=4)
clf.fit(train_d, train_l)

res = pd.DataFrame(clf.cv_results_)

In [6]:
res.head()

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
0,0.003842,0.001029,0.002097,0.000425,0.1,1e-09,"{'C': 0.1, 'gamma': 1e-09}",0.387097,0.419355,0.419355,0.387097,0.403226,0.016129,62
1,0.002604,0.000308,0.00154,0.00017,0.1,1e-08,"{'C': 0.1, 'gamma': 1e-08}",0.387097,0.419355,0.419355,0.387097,0.403226,0.016129,62
2,0.002414,0.000245,0.001516,0.000209,0.1,1e-07,"{'C': 0.1, 'gamma': 1e-07}",0.387097,0.419355,0.419355,0.387097,0.403226,0.016129,62
3,0.002201,0.000103,0.001424,0.000102,0.1,1e-06,"{'C': 0.1, 'gamma': 1e-06}",0.645161,0.709677,0.645161,0.645161,0.66129,0.027936,40
4,0.002093,8e-06,0.001388,1.9e-05,0.1,1e-05,"{'C': 0.1, 'gamma': 1e-05}",0.645161,0.741935,0.677419,0.645161,0.677419,0.039508,31


In [11]:
res.sort_values('rank_test_score').head(3)

Unnamed: 0,mean_fit_time,std_fit_time,mean_score_time,std_score_time,param_C,param_gamma,params,split0_test_score,split1_test_score,split2_test_score,split3_test_score,mean_test_score,std_test_score,rank_test_score
74,0.002911,0.000256,0.0014,7.3e-05,1000,1e-05,"{'C': 1000, 'gamma': 1e-05}",0.83871,0.967742,0.903226,0.935484,0.91129,0.04771,1
64,0.002514,0.00018,0.001336,3e-06,500,1e-05,"{'C': 500, 'gamma': 1e-05}",0.83871,0.935484,0.870968,0.935484,0.895161,0.041904,2
65,0.002571,0.000113,0.001336,5e-06,500,0.0001,"{'C': 500, 'gamma': 0.0001}",0.806452,0.903226,0.806452,0.870968,0.846774,0.041904,3


1st: C=1000, gamma=1e-05  
2nd: C=500, gamma=1e-05  
3rd: C=500, gamma=1e-04

In [25]:
for i in tqdm(range(0, 5)):
    train_d, test_d, train_l, test_l = train_test_split(data, label, test_size=0.3, stratify=label, random_state=i)
    param_grid = {'C': [0.1, 1, 5, 10, 50, 100, 500, 1000], 'gamma': [1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1]}
    clf = GridSearchCV(SVC(), param_grid, cv=4)
    clf.fit(train_d, train_l)
    res = pd.DataFrame(clf.cv_results_)
    C = res.sort_values('rank_test_score').head(3)['param_C']
    gamma = res.sort_values('rank_test_score').head(3)['param_gamma']
    print('Iter num: ', i)
    for j in range(0, 3):
        print('\t', j+1, ':\tC=', C[C.index[j]], ', gamma=', gamma[gamma.index[j]])
    print('\n')

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))

Iter num:  0
	 1 :	C= 500 , gamma= 1e-05
	 2 :	C= 1000 , gamma= 1e-05
	 3 :	C= 100 , gamma= 0.0001


Iter num:  1
	 1 :	C= 1000 , gamma= 1e-05
	 2 :	C= 500 , gamma= 1e-05
	 3 :	C= 100 , gamma= 0.0001


Iter num:  2
	 1 :	C= 1000 , gamma= 1e-05
	 2 :	C= 500 , gamma= 1e-05
	 3 :	C= 1000 , gamma= 0.0001


Iter num:  3
	 1 :	C= 500 , gamma= 1e-05
	 2 :	C= 1000 , gamma= 1e-05
	 3 :	C= 100 , gamma= 0.0001


Iter num:  4
	 1 :	C= 1000 , gamma= 1e-05
	 2 :	C= 100 , gamma= 0.0001
	 3 :	C= 500 , gamma= 1e-05



