# MLP실습 : 인공신경망 (분류)

## data/library 불러오기

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
data=pd.read_csv("breast-cancer-wisconsin.csv")
x=data[data.columns[1:10]]
y=data[['Class']]

In [10]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test= train_test_split(x,y,stratify=y,random_state=42)
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaler.fit(x_train)
x_scaled_train=scaler.transform(x_train)
x_scaled_test=scaler.transform(x_test)

## model 학습하기

In [11]:
from sklearn.neural_network import MLPClassifier
model=MLPClassifier()
model.fit(x_scaled_train,y_train)
pred_train=model.predict(x_scaled_train)
model.score(x_scaled_train,y_train)


0.97265625

## 결과 확인하기

In [12]:
from sklearn.metrics import classification_report
cfreport_train=classification_report(y_train,pred_train)
print(cfreport_train)

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       333
           1       0.97      0.96      0.96       179

    accuracy                           0.97       512
   macro avg       0.97      0.97      0.97       512
weighted avg       0.97      0.97      0.97       512



In [13]:
from sklearn.metrics import confusion_matrix
confusion_train=confusion_matrix(y_train,pred_train)
print(confusion_train)

[[327   6]
 [  8 171]]


## Hyperparameter 최적화

### GridSearch

In [17]:
from sklearn.model_selection import GridSearchCV
param_grid={"hidden_layer_sizes":[10,30,50,100],"solver":['sgd','adam'],'activation':['tanh','relu']}
grid_search=GridSearchCV(MLPClassifier(),param_grid,cv=5)
# 파라미터 찾기
grid_search.fit(x_scaled_train,y_train)

GridSearchCV(cv=5, estimator=MLPClassifier(),
             param_grid={'activation': ['tanh', 'relu'],
                         'hidden_layer_sizes': [10, 30, 50, 100],
                         'solver': ['sgd', 'adam']})

In [18]:
print("Best Parameter : {}".format(grid_search.best_params_))
print("Best Cross-validity Score : {:.4f}".format(grid_search.best_score_))
print('Test set Score : {:.4f}'.format(grid_search.score(x_scaled_test,y_test)))

Best Parameter : {'activation': 'relu', 'hidden_layer_sizes': 50, 'solver': 'adam'}
Best Cross-validity Score : 0.9746
Test set Score : 0.9591


### RandomSearch

In [21]:
from scipy.stats import randint
from sklearn.model_selection import RandomizedSearchCV
param_distribs={"hidden_layer_sizes":randint(low=10,high=100),"solver":['sgd','adam'],'activation':['tanh','relu']}
random_search=RandomizedSearchCV(MLPClassifier(),param_distributions=param_distribs,n_iter=10,cv=5,return_train_score=True)
# 파라미터 찾기
random_search.fit(x_scaled_train,y_train)

RandomizedSearchCV(cv=5, estimator=MLPClassifier(),
                   param_distributions={'activation': ['tanh', 'relu'],
                                        'hidden_layer_sizes': <scipy.stats._distn_infrastructure.rv_frozen object at 0x0000020197F478B0>,
                                        'solver': ['sgd', 'adam']},
                   return_train_score=True)

In [22]:
print("Best Parameter : {}".format(random_search.best_params_))
print("Best Cross-validity Score : {:.4f}".format(random_search.best_score_))
print('Test set Score : {:.4f}'.format(random_search.score(x_scaled_test,y_test)))

Best Parameter : {'activation': 'tanh', 'hidden_layer_sizes': 22, 'solver': 'adam'}
Best Cross-validity Score : 0.9746
Test set Score : 0.9591


# MLP실습 : 인공신경망 (회귀)

## library/data 불러오기

In [23]:
data=pd.read_csv("house_price.csv")
x=data[data.columns[1:5]]
y=data[['house_value']]
x_train,x_test,y_train,y_test= train_test_split(x,y,random_state=42)

In [24]:
from sklearn.neural_network import MLPRegressor

## model 확인하기

In [27]:
model=MLPRegressor()
scaler=MinMaxScaler()
scaler.fit(x_train)
x_scaled_train=scaler.transform(x_train)
x_scaled_test=scaler.transform(x_test)
model.fit(x_scaled_train,y_train)


-2.8277255322216313

## 결과 확인하기

In [28]:
pred_train=model.predict(x_scaled_train)
model.score(x_scaled_train,y_train)

-2.8277255322216313

In [31]:
pred_test=model.predict(x_scaled_test)
model.score(x_scaled_test,y_test)

-2.785095002611092

## RMSE 확인하기

In [32]:
import numpy as np
from sklearn.metrics import mean_squared_error
MSE_train=mean_squared_error(y_train,pred_train)
MSE_test=mean_squared_error(y_test,pred_test)
print(np.sqrt(MSE_train))
print(np.sqrt(MSE_test))

186733.1564855526
185994.8272436095


## hyperparameter tuning

In [33]:
# parameter 설정
model=MLPRegressor(hidden_layer_sizes=(64,64,64),activation="relu",random_state=1,max_iter=2000)
model.fit(x_scaled_train,y_train)
pred_train=model.predict(x_scaled_train)
model.score(x_scaled_train,y_train)

0.566197903746314

In [34]:
pred_test=model.predict(x_scaled_test)
model.score(x_scaled_test,y_test)

0.584086684313508

## RMSE 확인하기

In [35]:
MSE_train=mean_squared_error(y_train,pred_train)
MSE_test=mean_squared_error(y_test,pred_test)
print(np.sqrt(MSE_train))
print(np.sqrt(MSE_test))

62863.255358058195
61654.37310884089
