In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv('/content/drive/MyDrive/Orange3_colab/Orange3/Classification_Distance/house-standard.csv')
df.shape

(20495, 93)

In [3]:
data = df.drop(['VALP_B1'], axis = 1)
target = df['VALP_B1']

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    data, target, test_size = 0.5, random_state = 42
)
print("X_train shape", X_train.shape)
print("X_test shape", X_test.shape)

X_train shape (10247, 92)
X_test shape (10248, 92)


In [4]:
#서포트 벡터 머신(기본 모델)
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

clf_svm = SVC(kernel = 'rbf', C =1, gamma = 'auto', random_state = 0, probability = True)
clf_svm.fit(X_train, y_train)
pred = clf_svm.predict(X_test)

print("Accuracy on training set:{:.5f}".format(clf_svm.score(X_train, y_train)))
print("Accuracy on test set:{:.5f}".format(accuracy_score(y_test, pred)))

Accuracy on training set:0.75349
Accuracy on test set:0.73488


In [5]:
clf_svm = SVC(kernel = 'rbf', C =1, gamma = 'auto', random_state = 0, probability = True)

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

#StratifiedKFold의 random_state 옵션값을 특정 숫자(예 : 0)로 고정
cross_validation = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 0)
params = {'kernel' :['sigmoid'], 'C':[0.0001, 0.01, 0.1, 0.2, 0.3, 0.5, 1, 10], 'gamma':['auto', 'scale']}

#GridSearchCV의 cv=cross_validation 옵션값은 위의 StratifiedKFold의 random_state 옵션값을 적용해 GridSearchCV를 실행할 때마다 항상 동일한 결과가 나오도록 보장
grid_svm = GridSearchCV(
    clf_svm, param_grid=params, scoring = 'accuracy', cv = cross_validation, n_jobs = -1, verbose = 1
)
grid_svm.fit(X_train, y_train)

print("GridSearchCV max accuracy:{:.5f}".format(grid_svm.best_score_))
print("GridSearchCV best parameter:", (grid_svm.best_params_))

Fitting 3 folds for each of 16 candidates, totalling 48 fits
GridSearchCV max accuracy:0.73641
GridSearchCV best parameter: {'C': 1, 'gamma': 'auto', 'kernel': 'sigmoid'}


In [6]:
clf_svm = SVC(kernel = 'rbf', C =1, gamma = 'auto', random_state = 0, probability = True)

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold

#StratifiedKFold의 random_state 옵션값을 특정 숫자(예 : 0)로 고정
cross_validation = StratifiedKFold(n_splits = 3, shuffle = True, random_state = 0)
params = {'kernel' :['sigmoid'], 'C':[0.0001, 0.01, 1, 10], 'gamma':['auto', 'scale']}

#GridSearchCV의 cv=cross_validation 옵션값은 위의 StratifiedKFold의 random_state 옵션값을 적용해 GridSearchCV를 실행할 때마다 항상 동일한 결과가 나오도록 보장
grid_svm = GridSearchCV(
    clf_svm, param_grid=params, scoring = 'accuracy', cv = cross_validation, n_jobs = -1
)
grid_svm.fit(X_train, y_train)

print("GridSearchCV max accuracy:{:.5f}".format(grid_svm.best_score_))
print("GridSearchCV best parameter:", (grid_svm.best_params_))

GridSearchCV max accuracy:0.73641
GridSearchCV best parameter: {'C': 1, 'gamma': 'auto', 'kernel': 'sigmoid'}


In [8]:
#데이터세트에 적용
best_clf = grid_svm.best_estimator_
pred = best_clf.predict(X_test)
print("Accuracy on test set:{:.5f}".format(accuracy_score(y_test, pred)))

from sklearn.metrics import roc_auc_score

ROC_AUC = roc_auc_score(y_test, best_clf.predict_proba(X_test)[:, 1])
print("ROC AUC on test set:{:.5f}".format(ROC_AUC))

Accuracy on test set:0.72765
ROC AUC on test set:0.80411
