# Part 1. 분류 (Classification)

## 1. 분석 데이터 준비

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
data1 = pd.read_csv("breast-cancer-wisconsin.csv",encoding="utf-8")
X=data1[data1.columns[1:10]]
y=data1[['Class']]

In [2]:
from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y,random_state=42)

In [3]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
scaler.fit(X_train)
X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

## 2. 기본모델 적용

In [5]:
from sklearn.svm import SVC
model=SVC()
model.fit(X_scaled_train,y_train)
pred_train = model.predict(X_scaled_train)
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_train,y_train)

0.984375

In [6]:
from sklearn.metrics import confusion_matrix,classification_report
confusion_train = confusion_matrix(y_train,pred_train)
confusion_test = confusion_matrix(y_test,pred_test)

cfreport_train = classification_report(y_train,pred_train)
cfreport_test = classification_report(y_test,pred_test)

print("훈련 데이터 오차행렬 : \n",confusion_train)
print("분류 예측 리포트 : \n", cfreport_train)

훈련 데이터 오차행렬 : 
 [[329   4]
 [  4 175]]
분류 예측 리포트 : 
               precision    recall  f1-score   support

           0       0.99      0.99      0.99       333
           1       0.98      0.98      0.98       179

    accuracy                           0.98       512
   macro avg       0.98      0.98      0.98       512
weighted avg       0.98      0.98      0.98       512



In [7]:
model.score(X_scaled_test,y_test)

0.9649122807017544

In [8]:
print("테스트 데이터 오차행렬 : \n",confusion_test)
print("분류 예측 리포트 : \n", cfreport_test)

테스트 데이터 오차행렬 : 
 [[106   5]
 [  1  59]]
분류 예측 리포트 : 
               precision    recall  f1-score   support

           0       0.99      0.95      0.97       111
           1       0.92      0.98      0.95        60

    accuracy                           0.96       171
   macro avg       0.96      0.97      0.96       171
weighted avg       0.97      0.96      0.97       171



## 3.Grid Search

In [9]:
from sklearn.model_selection import GridSearchCV

param_grid = [{'kernel' :['rbf'], 
               'C' : [0.001,0.01,0.1,1,10,100],
              'gamma':[0.001,0.01,0.1,1,10,100]},
             {'kernel' :['linear'], 
               'C' : [0.001,0.01,0.1,1,10,100],
              'gamma':[0.001,0.01,0.1,1,10,100]}]

grid_search = GridSearchCV(SVC(),param_grid,cv=5)
grid_search.fit(X_scaled_train,y_train)

GridSearchCV(cv=5, estimator=SVC(),
             param_grid=[{'C': [0.001, 0.01, 0.1, 1, 10, 100],
                          'gamma': [0.001, 0.01, 0.1, 1, 10, 100],
                          'kernel': ['rbf']},
                         {'C': [0.001, 0.01, 0.1, 1, 10, 100],
                          'gamma': [0.001, 0.01, 0.1, 1, 10, 100],
                          'kernel': ['linear']}])

In [10]:
print("Best Parameter : ", grid_search.best_params_)
print("Best Score : ", grid_search.best_score_)
print("Testset Score : ", grid_search.score(X_scaled_test,y_test))

Best Parameter :  {'C': 100, 'gamma': 0.01, 'kernel': 'rbf'}
Best Score :  0.9745669141442985
Testset Score :  0.9590643274853801


## 4. Random Search

In [12]:
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {'kernel' : ['rbf'],
                  'C': randint(low=0.001,high=100),
                 'gamma':randint(low=0.001,high=100)}

random_search = RandomizedSearchCV(SVC(),param_distributions = param_distribs,
                                  n_iter=100,cv=5)
random_search.fit(X_scaled_train,y_train)

RandomizedSearchCV(cv=5, estimator=SVC(), n_iter=100,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000017674204C10>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000017674565490>,
                                        'kernel': ['rbf']})

In [13]:
print("Best Parameter : ", random_search.best_params_)
print("Best Score : ", random_search.best_score_)
print("Testset Score : ", random_search.score(X_scaled_test,y_test))

Best Parameter :  {'C': 43, 'gamma': 6, 'kernel': 'rbf'}
Best Score :  0.9648010660574909
Testset Score :  0.9649122807017544


# Part 2. 회귀 (Regression)

## 1. 분석 데이터 준비

In [14]:
data2=pd.read_csv('house_price.csv',encoding='utf-8')
X=data2[data2.columns[1:5]]
y=data2[['house_value']]

In [15]:
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)

scaler = MinMaxScaler()
scaler.fit(X_train)

X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

## 2. 기본모델 적용

In [22]:
from sklearn.svm import SVR
model = SVR(kernel='poly')
model.fit(X_scaled_train, y_train)
pred_train = model.predict(X_scaled_train)
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_train,y_train)

0.45177025652823866

In [23]:
model.score(X_scaled_test,y_test)

0.46997708096191393

In [24]:
from sklearn.metrics import mean_squared_error
import numpy as np

MSE_train = mean_squared_error(y_train,pred_train)
MSE_test = mean_squared_error(y_test,pred_test)

print("훈련 데이터 RMSE : ",np.sqrt(MSE_train))
print("테스트 데이터 RMSE : ", np.sqrt(MSE_test))

훈련 데이터 RMSE :  70669.55244251259
테스트 데이터 RMSE :  69600.08959938577


## 3. Grid Search

In [25]:
param_grid = {'kernel':['poly'],
              'C':[0.01,0.1,1,10],
              'gamma':[0.01,0.1,1,10]}

grid_search = GridSearchCV(SVR(),param_grid,cv=5)
grid_search.fit(X_scaled_train,y_train)

GridSearchCV(cv=5, estimator=SVR(),
             param_grid={'C': [0.01, 0.1, 1, 10], 'gamma': [0.01, 0.1, 1, 10],
                         'kernel': ['poly']})

In [26]:
print("Best Parameter : ", grid_search.best_params_)
print("Best Score : ", grid_search.best_score_)
print("Testset Score : ", grid_search.score(X_scaled_test,y_test))

Best Parameter :  {'C': 10, 'gamma': 10, 'kernel': 'poly'}
Best Score :  0.4887807602711213
Testset Score :  0.5092229851181338


## 4. Random Search

In [27]:
param_distribs = {'kernel' : ['poly'],
                 'C':randint(low=0.01,high=10),
                 'gamma' : randint(low=0.01,high=10)}
random_search = RandomizedSearchCV(SVR(kernel='poly'),
                                   param_distributions=param_distribs,
                                   n_iter=20,cv=5)

random_search.fit(X_scaled_train,y_train)

RandomizedSearchCV(cv=5, estimator=SVR(kernel='poly'), n_iter=20,
                   param_distributions={'C': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001767413B9D0>,
                                        'gamma': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000001767413B3A0>,
                                        'kernel': ['poly']})

In [28]:
print("Best Parameter : ", random_search.best_params_)
print("Best Score : ", random_search.best_score_)
print("Testset Score : ", random_search.score(X_scaled_test,y_test))

Best Parameter :  {'C': 5, 'gamma': 9, 'kernel': 'poly'}
Best Score :  0.4564560914614878
Testset Score :  0.48220909115765387
