# MLP 실습 : SVM (분류)

## data/library import

In [13]:
from sklearn.svm import SVC

In [14]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
data=pd.read_csv("breast-cancer-wisconsin.csv")
x=data[data.columns[1:10]]
y=data[['Class']]

In [15]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test= train_test_split(x,y,stratify=y,random_state=42)
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaler.fit(x_train)
x_scaled_train=scaler.transform(x_train)
x_scaled_test=scaler.transform(x_test)

## model 학습하기

In [16]:
model=SVC()
model.fit(x_scaled_train,y_train)

SVC()

In [17]:
pred_train=model.predict(x_scaled_train)
model.score(x_scaled_train,y_train)

0.984375

## 혼동행렬 확인하기

In [18]:
from sklearn.metrics import confusion_matrix
confusion_train=confusion_matrix(y_train,pred_train)
print(confusion_train)

[[329   4]
 [  4 175]]


In [19]:
from sklearn.metrics import classification_report
cfreport_train=classification_report(y_train,pred_train)
print(cfreport_train)

              precision    recall  f1-score   support

           0       0.99      0.99      0.99       333
           1       0.98      0.98      0.98       179

    accuracy                           0.98       512
   macro avg       0.98      0.98      0.98       512
weighted avg       0.98      0.98      0.98       512



In [20]:
pred_test=model.predict(x_scaled_test)
model.score(x_scaled_test,y_test)

0.9649122807017544

In [21]:
cfreport_test=classification_report(y_test,pred_test)
print(cfreport_test)

              precision    recall  f1-score   support

           0       0.99      0.95      0.97       111
           1       0.92      0.98      0.95        60

    accuracy                           0.96       171
   macro avg       0.96      0.97      0.96       171
weighted avg       0.97      0.96      0.97       171



## Hyper Parameter 최적화

### parameter 설정

#### GridSearch

In [22]:
from sklearn.model_selection import GridSearchCV

In [23]:
param_grid=[{"kernel":['rbf'],"C":[0.001,0.01,0.1,1,10,100],"gamma":[0.001,0.01,0.1,1,10,100]},
            {"kernel":['linear'],"C":[0.001,0.01,0.1,1,10,100],"gamma":[0.001,0.01,0.1,1,10,100]}]

In [24]:

grid_search=GridSearchCV(SVC(),param_grid,cv=5)
# 파라미터 찾기
grid_search.fit(x_scaled_train,y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.001, 0.01, 0.1, 1, 10, 100],
                          'gamma': [0.001, 0.01, 0.1, 1, 10, 100],
                          'kernel': ['rbf']},
                         {'C': [0.001, 0.01, 0.1, 1, 10, 100],
                          'gamma': [0.001, 0.01, 0.1, 1, 10, 100],
                          'kernel': ['linear']}])

In [25]:
print("Best Parameter : {}".format(grid_search.best_params_))
print("Best Cross-validity Score : {:.4f}".format(grid_search.best_score_))
print('Test set Score : {:.4f}'.format(grid_search.score(x_scaled_test,y_test)))

Best Parameter : {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Best Cross-validity Score : 0.9746
Test set Score : 0.9591


#### RandomSearch

In [26]:
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV
param_distribs={"kernel":["rbf"],"C":randint(low=0.001,high=100),"gamma":randint(low=0.001,high=100)}
random_search=RandomizedSearchCV(SVC(),param_distributions=param_distribs,n_iter=100,cv=5)
# 파라미터 찾기
random_search.fit(x_scaled_train,y_train)

RandomizedSearchCV(cv=5, estimator=SVC(), n_iter=100,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000002221D143820>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000002221B3BFE50>,
                                        'kernel': ['rbf']})

In [27]:
print("Best Parameter : {}".format(random_search.best_params_))
print("Best Cross-validity Score : {:.4f}".format(random_search.best_score_))
print('Test set Score : {:.4f}'.format(random_search.score(x_scaled_test,y_test)))

Best Parameter : {'C': 69, 'gamma': 6, 'kernel': 'rbf'}
Best Cross-validity Score : 0.9648
Test set Score : 0.9649


# MLP실습 : SVM(회귀)

## data/library 불러오기

In [28]:
data=pd.read_csv("house_price.csv")
x=data[data.columns[1:5]]
y=data[['house_value']]
x_train,x_test,y_train,y_test= train_test_split(x,y,random_state=42)

## model 학습하기

In [34]:
from sklearn.svm import SVR
scaler=MinMaxScaler()
scaler.fit(x_train)
x_scaled_train=scaler.transform(x_train)
x_scaled_test=scaler.transform(x_test)
model.fit(x_scaled_train,y_train)

model=SVR(kernel="poly")
model.fit(x_scaled_train,y_train)

SVR(kernel='poly')

## 결과확인하기

In [35]:
pred_train=model.predict(x_scaled_train)
model.score(x_scaled_train,y_train)

0.45177025652823866

In [36]:
pred_test=model.predict(x_scaled_test)
model.score(x_scaled_test,y_test)

0.46997708096191393

## RMSE 확인하기

In [37]:
import numpy as np
from sklearn.metrics import mean_squared_error
MSE_train=mean_squared_error(y_train,pred_train)
MSE_test=mean_squared_error(y_test,pred_test)
print(np.sqrt(MSE_train))
print(np.sqrt(MSE_test))

70669.55244251259
69600.08959938577


## Hyper parameter 설정

### GridSearch

In [38]:
from sklearn.model_selection import GridSearchCV

In [40]:
param_grid={"kernel":["poly"],"C":[0.01,0.1,1,10],"gamma":[0.01,0.1,1,10]}

In [41]:
grid_search=GridSearchCV(SVR(kernel="poly"),param_grid,cv=5)
# 파라미터 찾기
grid_search.fit(x_scaled_train,y_train)

GridSearchCV(cv=5, estimator=SVR(kernel='poly'),
             param_grid={'C': [0.01, 0.1, 1, 10], 'gamma': [0.01, 0.1, 1, 10],
                         'kernel': ['poly']})

In [42]:
print("Best Parameter : {}".format(grid_search.best_params_))
print("Best Cross-validity Score : {:.4f}".format(grid_search.best_score_))
print('Test set Score : {:.4f}'.format(grid_search.score(x_scaled_test,y_test)))

Best Parameter : {'C': 10, 'gamma': 10, 'kernel': 'poly'}
Best Cross-validity Score : 0.4888
Test set Score : 0.5092


### RandomSearch

In [43]:
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV
param_distribs={"kernel":["poly"],"C":randint(low=0.01,high=10),"gamma":randint(low=0.01,high=10)}
random_search=RandomizedSearchCV(SVR(kernel="poly"),param_distributions=param_distribs,n_iter=20,cv=5,return_train_score=True)
# 파라미터 찾기
random_search.fit(x_scaled_train,y_train)

RandomizedSearchCV(cv=5, estimator=SVR(kernel='poly'), n_iter=20,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000002221D16BA00>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000002221B2EB130>,
                                        'kernel': ['poly']},
                   return_train_score=True)

In [44]:
print("Best Parameter : {}".format(random_search.best_params_))
print("Best Cross-validity Score : {:.4f}".format(random_search.best_score_))
print('Test set Score : {:.4f}'.format(random_search.score(x_scaled_test,y_test)))

Best Parameter : {'C': 9, 'gamma': 7, 'kernel': 'poly'}
Best Cross-validity Score : 0.4503
Test set Score : 0.4774
