svm.LinearSVC([penalty, loss, dual, tol, C, ...])
Linear Support Vector Classification.

svm.LinearSVR(*[, epsilon, tol, C, loss, ...])
Linear Support Vector Regression.

svm.NuSVC(*[, nu, kernel, degree, gamma, ...])
Nu-Support Vector Classification.

svm.NuSVR(*[, nu, C, kernel, degree, gamma, ...])
Nu Support Vector Regression.

svm.OneClassSVM(*[, kernel, degree, gamma, ...])
Unsupervised Outlier Detection.

svm.SVC(*[, C, kernel, degree, gamma, ...])
C-Support Vector Classification.

svm.SVR(*[, kernel, degree, gamma, coef0, ...])
Epsilon-Support Vector Regression.

### sklearn.svm.SVC
* class sklearn.svm.SVC(*, C=1.0, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=None)
> https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVC.html#sklearn.svm.SVC

In [20]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=156)

In [21]:
from sklearn import svm
clf = svm.SVC(gamma=0.01, C=200.) # 실제로 사용할 때는 해당 값 수정해보면 됨
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(y_test)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))

[2 0 1 2 0 1 2 2 0 2 2 0 2 1 1 2 0 0 2 1 1 1 1 2 0 1 2 0 0 2]
1.0


In [27]:
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=121)

from sklearn import svm
clf = svm.SVC(gamma='auto', kernel='poly', degree=2) # 해당 부분 조정
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)
print(y_test)
from sklearn.metrics import accuracy_score
print(accuracy_score(y_test, predictions))

[1 2 1 0 0 1 1 1 1 2 1 1 1 0 0 2 1 0 2 0 2 2 1 1 1 1 0 0 2 2]
0.9666666666666667


### sklearn.neighbors.KNeighborsClassifier
* class sklearn.neighbors.KNeighborsClassifier(n_neighbors=5, *, weights='uniform', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None)
> https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html?highlight=kneighbor#sklearn.neighbors.KNeighborsClassifier

In [28]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn import datasets
iris = datasets.load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.2, random_state=121)

knn = KNeighborsClassifier(n_neighbors=6)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
scores = metrics.accuracy_score(y_test, y_pred)
scores

0.9666666666666667

In [29]:
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)

# 0 = setosa, 1 = versicolor, 2 = virginica
classes = {0:'setosa', 1:'versicolor', 2:'virginica'}

# 아직 보지 못한 새로운 데이터를 제시해보자
x_new = [[3, 4, 5, 2], [5, 4, 2, 2]]
y_predict = knn.predict(x_new)

print(classes[y_predict[0]])
print(classes[y_predict[1]])

versicolor
setosa


##### grid search cv

In [30]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2,random_state=121)



from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.model_selection import GridSearchCV

knn = KNeighborsClassifier()

parameters = {'n_neighbors' : [3,4,5,6]} # dict 형태여야 함 (dict을 list로 여러 개 쓸 수 있다.)



import pandas as pd

grid_knn = GridSearchCV(knn, param_grid=parameters, cv=3, refit=True)

# 붓꽃 Train 데이터로 param_grid의 하이퍼 파라미터들을 순차적으로 학습/평가
grid_knn.fit(X_train, y_train)
# GridSearchCV 결과 추출하여 DataFrame으로 변환
scores_df = pd.DataFrame(grid_knn.cv_results_)
scores_df[['params', 'mean_test_score', 'rank_test_score',\
    'split0_test_score', 'split1_test_score', 'split2_test_score']]

Unnamed: 0,params,mean_test_score,rank_test_score,split0_test_score,split1_test_score,split2_test_score
0,{'n_neighbors': 3},0.966667,2,0.95,1.0,0.95
1,{'n_neighbors': 4},0.958333,3,0.925,1.0,0.95
2,{'n_neighbors': 5},0.975,1,0.95,1.0,0.975
3,{'n_neighbors': 6},0.958333,3,0.925,0.975,0.975


In [31]:
from sklearn.metrics import accuracy_score

# GridSearchCV의 refit으로 이미 학습이 된 estimator 반환
estimator = grid_knn.best_estimator_

# GridSearchCV의 best_estimator_는 이미 최적 하이퍼 파라미터로 학습이 됨
pred = estimator.predict(X_test)
print('테스트 데이터 세트 정확도 : {0:.4f}'.format(accuracy_score(y_test,pred)))

테스트 데이터 세트 정확도 : 0.9667
