# Part 1. 분류 (Classification)

# 1. 분석 데이터 준비

In [2]:
import warnings
warnings.filterwarnings("ignore")

import pandas as pd
data1 = pd.read_csv('breast-cancer-wisconsin.csv',encoding='utf-8')
X=data1[data1.columns[1:10]]
y=data1[["Class"]]
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,stratify=y,random_state=42)

In [3]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

# 2. 기본모델 적용


In [7]:
from sklearn.neural_network import MLPClassifier
model=MLPClassifier()
model.fit(X_scaled_train,y_train)
pred_train = model.predict(X_scaled_train)
pred_test = model.predict(X_scaled_test)



In [12]:
from sklearn.metrics import confusion_matrix,classification_report
train_score = model.score(X_scaled_train,y_train)
print("훈련데이터 점수 :\n  ",train_score)
confusion_train = confusion_matrix(y_train,pred_train)
cfreport_train = classification_report(y_train,pred_train)
print("훈련데이터 오차행렬 : \n", confusion_train)
print("분류예측 리포트 \n", cfreport_train)

훈련데이터 점수 :
   0.974609375
훈련데이터 오차행렬 : 
 [[328   5]
 [  8 171]]
분류예측 리포트 
               precision    recall  f1-score   support

           0       0.98      0.98      0.98       333
           1       0.97      0.96      0.96       179

    accuracy                           0.97       512
   macro avg       0.97      0.97      0.97       512
weighted avg       0.97      0.97      0.97       512



In [13]:
test_score = model.score(X_scaled_test,y_test)
print("테스트 데이터 점수 :\n  ",test_score)
confusion_test = confusion_matrix(y_test,pred_test)
cfreport_test = classification_report(y_test,pred_test)
print("테스트데이터 오차행렬 : \n", confusion_test)
print("분류예측 리포트 \n", cfreport_test)

테스트 데이터 점수 :
   0.9590643274853801
테스트데이터 오차행렬 : 
 [[106   5]
 [  2  58]]
분류예측 리포트 
               precision    recall  f1-score   support

           0       0.98      0.95      0.97       111
           1       0.92      0.97      0.94        60

    accuracy                           0.96       171
   macro avg       0.95      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171



# 3.Grid Search

In [15]:
from sklearn. model_selection import GridSearchCV
param_grid = {'hidden_layer_sizes' : [10,30,50,100],
             'solver' : ['sgd','adam'],
             'activation' : ['tanh','relu']}
grid_search = GridSearchCV(MLPClassifier(),param_grid,cv=5)
grid_search.fit(X_scaled_train,y_train)

GridSearchCV(cv=5, estimator=MLPClassifier(),
             param_grid={'activation': ['tanh', 'relu'],
                         'hidden_layer_sizes': [10, 30, 50, 100],
                         'solver': ['sgd', 'adam']})

In [16]:
print("Best Parameter : ", grid_search.best_params_)
print("Best Score : ", grid_search.best_score_)
print("Testset Score : ", grid_search.score(X_scaled_test,y_test))

Best Parameter :  {'activation': 'relu', 'hidden_layer_sizes': 30, 'solver': 'adam'}
Best Score :  0.9745859508852085
Testset Score :  0.9590643274853801


# 4. Random Search

In [18]:
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV

param_distribs = {'hidden_layer_sizes' : randint(low=10,high=100),
                 'solver':['sgd','adam'],
                 'activation' : ['tanh','relu']}

random_search = RandomizedSearchCV(MLPClassifier(),
                                   param_distributions = param_distribs,
                                  n_iter = 10, cv=5)
random_search.fit(X_scaled_train,y_train)

RandomizedSearchCV(cv=5, estimator=MLPClassifier(), n_iter=100,
                   param_distributions={'activation': ['tanh', 'relu'],
                                        'hidden_layer_sizes': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000021B3904A3A0>,
                                        'solver': ['sgd', 'adam']})

In [19]:
print("Best Parameter : ", random_search.best_params_)
print("Best Score : ", random_search.best_score_)
print("Testset Score : ", random_search.score(X_scaled_test,y_test))

Best Parameter :  {'activation': 'tanh', 'hidden_layer_sizes': 11, 'solver': 'adam'}
Best Score :  0.976546735198934
Testset Score :  0.9590643274853801


# Part 2. 회귀(Regression)

## 1. 분석데이터 준비


In [25]:
data2 = pd.read_csv('house_price.csv',encoding='utf-8')
X=data2[data2.columns[1:5]]
y=data2[['house_value']]

X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42)

scaler=MinMaxScaler()
scaler.fit(X_train)
X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

## 2. 기본 모델 적용

In [26]:
from sklearn.neural_network import MLPRegressor
model = MLPRegressor()
model.fit(X_scaled_train,y_train)
pred_train = model.predict(X_scaled_train)
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_train,y_train)

-2.821026297139299

In [27]:
model.score(X_scaled_test,y_test)

-2.778429402677678

In [28]:
import numpy as np
from sklearn.metrics import mean_squared_error

MSE_train = mean_squared_error(y_train,pred_train)
MSE_test = mean_squared_error(y_test,pred_test)

print("훈련 데이터 RMSE : ", np.sqrt(MSE_train))
print("테스트 데이터 RMSE : ", np.sqrt(MSE_test))

훈련 데이터 RMSE :  186569.67596174456
테스트 데이터 RMSE :  185830.98546424537


## 3. 튜닝모델

In [29]:
from sklearn.neural_network import MLPRegressor

model = MLPRegressor(hidden_layer_sizes = (64,64,64),
                     activation='relu',random_state=1,
                    max_iter=2000)
model.fit(X_scaled_train,y_train)
pred_train = model.predict(X_scaled_train)
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_train,y_train)

0.566197903746314

In [30]:
model.score(X_scaled_test,y_test)

0.584086684313508

In [31]:
MSE_train = mean_squared_error(y_train,pred_train)
MSE_test = mean_squared_error(y_test,pred_test)

print("훈련 데이터 RMSE : ", np.sqrt(MSE_train))
print("테스트 데이터 RMSE : ", np.sqrt(MSE_test))

훈련 데이터 RMSE :  62863.255358058195
테스트 데이터 RMSE :  61654.37310884089
