## Artificial neural network
- 인간의 뉴런 구조와 활성화 작동원리를 근간으로 input과 output과의 연관을 구현한 알고리즘
- 은닉층과 노드 들을 깊고 넓게 두어 분류와 회귀를 더 잘할 수 있도록 특징 추출 및 분류 단계를 확장하는 역할을 할 수 있도록 한 모델 
- Multi-Layer-Perceptron을 수행

### Classification

In [5]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd 
data = pd.read_csv("../Data/breast-cancer-wisconsin.csv", encoding="utf-8")
X = data.iloc[:, 1:10]
y = data[['Class']]
print(X.shape)
print(y.shape)

(683, 9)
(683, 1)


In [6]:
from sklearn.model_selection import * 
X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y, random_state=410)

from sklearn.preprocessing import * 
minmax = MinMaxScaler()
minmax.fit(X_train, y_train)
X_scaled_train = minmax.transform(X_train)
X_scaled_test = minmax.transform(X_test)

In [7]:
from sklearn.neural_network import * 
model = MLPClassifier() 
model.fit(X_scaled_train, y_train)
pred_train = model.predict(X_scaled_train)
model.score(X_scaled_train, y_train)


0.970703125

In [8]:
from sklearn.metrics import *
con_train = confusion_matrix(y_train, pred_train)
print(con_train)

[[326   7]
 [  8 171]]


In [9]:
report_train = classification_report(y_train, pred_train)
print(report_train)

              precision    recall  f1-score   support

           0       0.98      0.98      0.98       333
           1       0.96      0.96      0.96       179

    accuracy                           0.97       512
   macro avg       0.97      0.97      0.97       512
weighted avg       0.97      0.97      0.97       512



In [10]:
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_test, y_test)

0.9766081871345029

In [11]:
con_test = confusion_matrix(y_test, pred_test)
print(con_test)

[[108   3]
 [  1  59]]


In [12]:
report_test = classification_report(y_test, pred_test)
print(report_test)

              precision    recall  f1-score   support

           0       0.99      0.97      0.98       111
           1       0.95      0.98      0.97        60

    accuracy                           0.98       171
   macro avg       0.97      0.98      0.97       171
weighted avg       0.98      0.98      0.98       171



In [13]:
help(MLPClassifier)

Help on class MLPClassifier in module sklearn.neural_network._multilayer_perceptron:

class MLPClassifier(sklearn.base.ClassifierMixin, BaseMultilayerPerceptron)
 |  MLPClassifier(hidden_layer_sizes=(100,), activation='relu', *, solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10, max_fun=15000)
 |  
 |  Multi-layer Perceptron classifier.
 |  
 |  This model optimizes the log-loss function using LBFGS or stochastic
 |  gradient descent.
 |  
 |  .. versionadded:: 0.18
 |  
 |  Parameters
 |  ----------
 |  hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)
 |      The ith element represents the number of neurons in the ith
 |      hidden layer.
 |  
 |  activation : {'identity

In [14]:
paramGrid = {"hidden_layer_sizes" : [10, 30, 50, 100], "solver" : ['sgd', 'adam'], 
"activation" : ['tanh', 'relu']}
gridSearch = GridSearchCV(MLPClassifier(), paramGrid, cv = 5)
gridSearch.fit(X_scaled_train, y_train)

GridSearchCV(cv=5, estimator=MLPClassifier(),
             param_grid={'activation': ['tanh', 'relu'],
                         'hidden_layer_sizes': [10, 30, 50, 100],
                         'solver': ['sgd', 'adam']})

In [15]:
print("Best Param : {}".format(gridSearch.best_params_))
print("Best Score : {}".format(gridSearch.best_score_))
print("Test score : {}".format(gridSearch.score(X_scaled_test, y_test)))

Best Param : {'activation': 'tanh', 'hidden_layer_sizes': 100, 'solver': 'adam'}
Best Score : 0.968741671425852
Test score : 0.9707602339181286


In [16]:
from scipy.stats import randint
paramDist = {"hidden_layer_sizes" : randint(low = 10, high = 200),
"solver" : ['sgd', 'adam'], "activation" : ['tanh', 'relu']}
randomSearch = RandomizedSearchCV(MLPClassifier(), paramDist, n_iter = 100, cv = 5)
randomSearch.fit(X_scaled_train, y_train)

RandomizedSearchCV(cv=5, estimator=MLPClassifier(), n_iter=100,
                   param_distributions={'activation': ['tanh', 'relu'],
                                        'hidden_layer_sizes': <scipy.stats._distn_infrastructure.rv_frozen object at 0x15bc09d90>,
                                        'solver': ['sgd', 'adam']})

In [17]:
print("Best param : {}".format(randomSearch.best_params_))
print("Best score : {}".format(randomSearch.best_score_))
print("Test score : {}".format(randomSearch.score(X_scaled_test, y_test)))

Best param : {'activation': 'tanh', 'hidden_layer_sizes': 198, 'solver': 'adam'}
Best score : 0.9707405292213973
Test score : 0.9707602339181286


### Regression

In [18]:
import pandas as pd 
data2 = pd.read_csv("../Data/house_price.csv", encoding="utf-8")
X = data2.iloc[:, 1:5]
y = data2[["house_value"]]

In [20]:
from sklearn.model_selection import * 
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=410)

from sklearn.preprocessing import * 
minmax = MinMaxScaler()
minmax.fit(X_train)
X_scaled_train = minmax.transform(X_train)
X_scaled_test = minmax.transform(X_test)


In [22]:
import sklearn 
dir(sklearn)

['__SKLEARN_SETUP__',
 '__all__',
 '__builtins__',
 '__cached__',
 '__check_build',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_config',
 '_distributor_init',
 '_loss',
 'base',
 'clone',
 'config_context',
 'exceptions',
 'externals',
 'get_config',
 'logger',
 'logging',
 'metrics',
 'model_selection',
 'neural_network',
 'os',
 'preprocessing',
 'random',
 'set_config',
 'setup_module',
 'show_versions',
 'sys',
 'utils']

In [23]:
from sklearn.neural_network import * 
dir(sklearn.neural_network)

['BernoulliRBM',
 'MLPClassifier',
 'MLPRegressor',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_base',
 '_multilayer_perceptron',
 '_rbm',
 '_stochastic_optimizers']

In [24]:
model = MLPRegressor()
model.fit(X_scaled_train, y_train)
pred_train = model.predict(X_scaled_train)
model.score(X_scaled_train, y_train)

-2.9010256994303107

In [25]:
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_test, y_test)

-2.888364486565458

In [26]:
import numpy as np 
from sklearn.metrics import * 
MSE_train = mean_squared_error(y_train, pred_train)
MSE_test = mean_squared_error(y_test, pred_test)

RMSE_train = np.sqrt(MSE_train)
RMSE_test = np.sqrt(MSE_test)
print("학습 데이터 RMSE : ", RMSE_train)
print("테스트 데이터 RMSE : ", RMSE_test)

학습 데이터 RMSE :  187380.47679168862
테스트 데이터 RMSE :  191758.86967280888


### Tune Model

In [27]:
help(MLPRegressor)

Help on class MLPRegressor in module sklearn.neural_network._multilayer_perceptron:

class MLPRegressor(sklearn.base.RegressorMixin, BaseMultilayerPerceptron)
 |  MLPRegressor(hidden_layer_sizes=(100,), activation='relu', *, solver='adam', alpha=0.0001, batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5, max_iter=200, shuffle=True, random_state=None, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10, max_fun=15000)
 |  
 |  Multi-layer Perceptron regressor.
 |  
 |  This model optimizes the squared error using LBFGS or stochastic gradient
 |  descent.
 |  
 |  .. versionadded:: 0.18
 |  
 |  Parameters
 |  ----------
 |  hidden_layer_sizes : tuple, length = n_layers - 2, default=(100,)
 |      The ith element represents the number of neurons in the ith
 |      hidden layer.
 |  
 |  activation : {'identity', 'logis

In [29]:
model = MLPRegressor(hidden_layer_sizes=(64, 64, 64), activation="relu", random_state=410, max_iter=2000)
model.fit(X_scaled_train, y_train)
pred_train = model.predict(X_scaled_train)
model.score(X_scaled_train, y_train)

0.5825656817084719

In [30]:
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_test, y_test)

0.5959384323159135

In [31]:
import numpy as np 
from sklearn.metrics import * 
MSE_train = mean_squared_error(y_train, pred_train)
MSE_test = mean_squared_error(y_test, pred_test)

RMSE_train = np.sqrt(MSE_train)
RMSE_test = np.sqrt(MSE_test)
print("학습 데이터 RMSE : ", RMSE_train)
print("테스트 데이터 RMSE : ", RMSE_test)

학습 데이터 RMSE :  61295.55889172482
테스트 데이터 RMSE :  61815.266985704046
