<a href="https://colab.research.google.com/github/leepopnamoo/SQL-Study/blob/main/Ch4_Support_Vector_Machine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#**Ch4_서포트 벡터 머신 실습**

##**라이브러리 불러오기**

In [None]:
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from scipy import stats
# use seaborn plotting defaults
import seaborn as sns; sns.set()
from sklearn.svm import SVC, LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import StandardScaler

##**iris 데이터 불러오기**

In [None]:
iris = datasets.load_iris()
X = iris["data"][:, (2, 3)]  # petal length, petal width
y = iris["target"]

In [None]:
print(y)

In [None]:
setosa_or_versicolor = (y == 0) | (y == 1)
X = X[setosa_or_versicolor]
y = y[setosa_or_versicolor]

In [None]:
print(setosa_or_versicolor)

In [None]:
print(y)

##**서포트 벡터 분류 모델 생성**

###**학습 파라미터 설정**

In [None]:
C = 5
alpha = 1 / (C * len(X))

###**입력 데이터 스케일 변환**

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_scaled

###**선형 SVM 회귀 모델 생성**

In [None]:
lin_clf = LinearSVC(loss="hinge", C=C, random_state=42)
lin_clf.fit(X_scaled, y)

###**다항식 커널을 사용한 SVM 분류 모델 생성**

In [None]:
svm_clf = SVC(kernel="linear", C=C)
svm_clf.fit(X_scaled, y)

###**확률적 경사하강법을 이용한 선형모델**

In [None]:
sgd_clf = SGDClassifier(loss="hinge", learning_rate="constant", eta0=0.001, tol=1e-3, alpha=alpha,
                        max_iter=100000, random_state=42)
sgd_clf.fit(X_scaled, y)

##**각 분류기의 결정경계 확인**

In [None]:
print("LinearSVC:                   ", lin_clf.intercept_, lin_clf.coef_)
print("SVC:                         ", svm_clf.intercept_, svm_clf.coef_)
print("SGDClassifier(alpha={:.5f}):".format(sgd_clf.alpha), sgd_clf.intercept_, sgd_clf.coef_)

###**선형 SVM 회귀 모델의 기울기와 편향 계산**

In [None]:
w1 = -lin_clf.coef_[0, 0]/lin_clf.coef_[0, 1] # 기울기 계산
b1 = -lin_clf.intercept_[0]/lin_clf.coef_[0, 1] # 편향 계산

###**다항식 커널을 사용한 SVM 분류 모델의 기울기와 편향 계산**

In [None]:
w2 = -svm_clf.coef_[0, 0]/svm_clf.coef_[0, 1]
b2 = -svm_clf.intercept_[0]/svm_clf.coef_[0, 1]

###**확률적 경사하강법을 이용한 선형모델의 기울기와 편향 계산**

In [None]:
w3 = -sgd_clf.coef_[0, 0]/sgd_clf.coef_[0, 1]
b3 = -sgd_clf.intercept_[0]/sgd_clf.coef_[0, 1]

###**모든 모델의 결정경계를 원본 스케일로 재변환**

In [None]:
line1 = scaler.inverse_transform([[-10, -10 * w1 + b1], [10, 10 * w1 + b1]])

In [None]:
line2 = scaler.inverse_transform([[-10, -10 * w2 + b2], [10, 10 * w2 + b2]])

In [None]:
line3 = scaler.inverse_transform([[-10, -10 * w3 + b3], [10, 10 * w3 + b3]])

##**결정경계 시각화**

In [None]:
# 맷플롯립 설정
%matplotlib inline
import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['axes.labelsize'] = 20
plt.rcParams['xtick.labelsize'] = 16
plt.rcParams['ytick.labelsize'] = 16

In [None]:
plt.figure(figsize=(15, 6))
plt.plot(line1[:, 0], line1[:, 1], "k:", label="LinearSVC")
plt.plot(line2[:, 0], line2[:, 1], "b--", linewidth=2, label="SVC")
plt.plot(line3[:, 0], line3[:, 1], "r-", label="SGDClassifier")
plt.plot(X[:, 0][y==1], X[:, 1][y==1], "bs") # label="Iris-Versicolor"
plt.plot(X[:, 0][y==0], X[:, 1][y==0], "yo") # label="Iris-Setosa"
plt.xlabel("Petal Length", fontsize=14)
plt.ylabel("Petal Width", fontsize=14)
plt.legend(loc="upper center", fontsize=14)
plt.axis([0, 5.5, 0, 2])
plt.show()

##**얼굴 인식**

In [None]:
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)

In [None]:
fig, ax = plt.subplots(3, 5, figsize=(16, 6))
for i, axi in enumerate(ax.flat):
    axi.imshow(faces.images[i], cmap='bone')
    axi.set(xticks=[], yticks=[],
            xlabel=faces.target_names[faces.target[i]])

In [None]:
from sklearn.svm import SVC
from sklearn.decomposition import PCA as RandomizedPCA
from sklearn.pipeline import make_pipeline

pca = RandomizedPCA(n_components=150, whiten=True, random_state=42)
svc = SVC(kernel='rbf', class_weight='balanced')
model = make_pipeline(pca, svc)

In [None]:
from sklearn.model_selection import train_test_split
Xtrain, Xtest, ytrain, ytest = train_test_split(faces.data, faces.target,
                                                random_state=42)

In [None]:
# 감마값 최적값 찾기 
from sklearn.model_selection import GridSearchCV
param_grid = {'svc__C': [5, 10],
              'svc__gamma': [0.001, 0.005]}
grid = GridSearchCV(model, param_grid)

grid.fit(Xtrain, ytrain)
print(grid.best_params_)

In [None]:
model = grid.best_estimator_
yfit = model.predict(Xtest)

In [None]:
yfit

In [None]:
from sklearn.metrics import classification_report
print(classification_report(ytest, yfit,
                            target_names=faces.target_names))

In [None]:
from sklearn.metrics import confusion_matrix
mat = confusion_matrix(ytest, yfit)
plt.figure(figsize=(5,5))
sns.heatmap(mat, square=True, annot=True, fmt='d', cbar=False,
            xticklabels=faces.target_names,
            yticklabels=faces.target_names)
plt.ylabel('true label')
plt.xlabel('predicted label');

In [None]:
fig, ax = plt.subplots(4, 6,  figsize=(10, 6))
for i, axi in enumerate(ax.flat):
    axi.imshow(Xtest[i].reshape(62, 47), cmap='bone')
    axi.set(xticks=[], yticks=[])
    axi.set_ylabel(faces.target_names[yfit[i]].split()[-1],
                   color='black' if yfit[i] == ytest[i] else 'red')
fig.suptitle('Predicted Names; Incorrect Labels in Red', size=14);