# Grid Search SVC 1

Explorando `GridSearchCV` com `SVC` para encontrar melhor valor para parâmetro `C` que se adeque ao dataset das correntes.

## Importando bibliotecas

In [1]:
import numpy as np
import pandas as pd

from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV

## Carregando dados

In [2]:
# Carregando DataFrame
chainDF = pd.read_csv('./corrente.csv')

targetS = chainDF.pop('classe')

# Carregando Rótulos das características
feature_names = chainDF.columns

# Carregando rótulos das classes
target_names = ['Normal', 'HI-1', 'HI-2', 'HI-3', 'LI-1', 'LI-2', 'LI-3']

# Definindo variáveis
X, y = chainDF.to_numpy(), targetS.to_numpy()

# Separando dados
X_pre_train, X_pre_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Pré-processando dados
sc = StandardScaler().fit(X_pre_train)

X_train = sc.transform(X_pre_train)
X_test = sc.transform(X_pre_test)

## Grid Search

In [3]:
cGS = GridSearchCV(
    estimator=SVC(),
    cv=15,
    param_grid={
        'C':np.linspace(0.1, 3, 20),
        'decision_function_shape':['ovo'],
        'cache_size':[1000]}
).fit(
    X_train,
    y_train
)

kernelDF = pd.DataFrame(cGS.cv_results_)
kernelDF[['param_C', 'mean_test_score', 'std_test_score', 'rank_test_score']].sort_values(by='rank_test_score')

Unnamed: 0,param_C,mean_test_score,std_test_score,rank_test_score
19,3.0,0.556958,0.021389,1
18,2.84737,0.550741,0.019785,2
17,2.69474,0.544891,0.018745,3
16,2.54211,0.540871,0.019075,4
15,2.38947,0.535384,0.022002,5
14,2.23684,0.52807,0.020036,6
13,2.08421,0.523133,0.021033,7
12,1.93158,0.519839,0.020353,8
11,1.77895,0.510882,0.021881,9
10,1.62632,0.501745,0.021478,10


Como parece que a Acurácia média aumenta conforme o parâmetro C, resolvi fazer outra busca em outro intervalo

In [4]:
c2GS = GridSearchCV(
    estimator=SVC(),
    cv=15,
    param_grid={
        'C':np.arange(4, 44, 4),
        'decision_function_shape':['ovo'],
        'cache_size':[1000]}
).fit(
    X_train,
    y_train
)

kernelDF = pd.DataFrame(c2GS.cv_results_)
kernelDF[['param_C', 'mean_test_score', 'std_test_score', 'rank_test_score']].sort_values(by='rank_test_score')

Unnamed: 0,param_C,mean_test_score,std_test_score,rank_test_score
9,40,0.724272,0.017083,1
8,36,0.719335,0.017717,2
7,32,0.712937,0.0184,3
6,28,0.709462,0.018613,4
5,24,0.705987,0.02053,5
4,20,0.695748,0.019385,6
3,16,0.683498,0.015444,7
2,12,0.666124,0.016793,8
1,8,0.63832,0.015171,9
0,4,0.582008,0.020961,10


In [5]:
c3GS = GridSearchCV(
    estimator=SVC(),
    cv=15,
    param_grid={
        'C':np.arange(50, 110, 10),
        'decision_function_shape':['ovo'],
        'cache_size':[1000]}
).fit(
    X_train,
    y_train
)

kernelDF = pd.DataFrame(c3GS.cv_results_)
kernelDF[['param_C', 'mean_test_score', 'std_test_score', 'rank_test_score']].sort_values(by='rank_test_score')

Unnamed: 0,param_C,mean_test_score,std_test_score,rank_test_score
5,100,0.749875,0.017695,1
4,90,0.746948,0.016612,2
3,80,0.74512,0.018343,3
2,70,0.744387,0.017141,4
1,60,0.741276,0.01865,5
0,50,0.735059,0.01728,6


In [6]:
c4GS = GridSearchCV(
    estimator=SVC(),
    cv=15,
    param_grid={
        'C':np.arange(120, 220, 20),
        'decision_function_shape':['ovo'],
        'cache_size':[1000]}
).fit(
    X_train,
    y_train
)

kernelDF = pd.DataFrame(c4GS.cv_results_)
kernelDF[['param_C', 'mean_test_score', 'std_test_score', 'rank_test_score']].sort_values(by='rank_test_score')

Unnamed: 0,param_C,mean_test_score,std_test_score,rank_test_score
4,200,0.760662,0.017976,1
3,180,0.758284,0.01818,2
2,160,0.75792,0.017856,3
1,140,0.756091,0.017233,4
0,120,0.753346,0.018018,5
