In [1]:
import pandas as pd
data = pd.read_csv('breast-cancer-wisconsin.csv', encoding = 'utf-8')
data.head()

Unnamed: 0,code,Clump_Thickness,Cell_Size,Cell_Shape,Marginal_Adhesion,Single_Epithelial_Cell_Size,Bare_Nuclei,Bland_Chromatin,Normal_Nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1,3,1,1,0
1,1002945,5,4,4,5,7,10,3,2,1,0
2,1015425,3,1,1,1,2,2,3,1,1,0
3,1016277,6,8,8,1,3,4,3,7,1,0
4,1017023,4,1,1,3,2,1,3,1,1,0


In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
X = data[data.columns[1:10]]
y = data[['Class']]

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, random_state = 42)

In [5]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
scaler.fit(X_train)
X_scaled_train = scaler.transform(X_train)
X_scaled_test = scaler.transform(X_test)

**분류**

In [6]:
from sklearn.neural_network import MLPClassifier
model = MLPClassifier()
model.fit(X_scaled_train, y_train)
pred_train = model.predict(X_scaled_train)
model.score(X_scaled_train, y_train)

0.974609375

In [7]:
from sklearn.metrics import confusion_matrix
confusion_matrix = confusion_matrix(y_train, pred_train)
print("훈련데이터 오차행렬 : \n", confusion_matrix)

훈련데이터 오차행렬 : 
 [[328   5]
 [  8 171]]


In [8]:
from sklearn.metrics import classification_report
cfreport_train = classification_report(y_train, pred_train)
print("분류예측 보고서 : \n", cfreport_train)

분류예측 보고서 : 
               precision    recall  f1-score   support

           0       0.98      0.98      0.98       333
           1       0.97      0.96      0.96       179

    accuracy                           0.97       512
   macro avg       0.97      0.97      0.97       512
weighted avg       0.97      0.97      0.97       512



In [11]:
pred_test = model.predict(X_scaled_test)
model.score(X_scaled_test, y_test)

0.9590643274853801

In [15]:
confusion_test = confusion_matrix(y_test, pred_test)
print("테스트데이터 오차행렬 : \n", confusion_test)

TypeError: 'numpy.ndarray' object is not callable

In [14]:
from sklearn.metrics import classification_report
cfreport_test= classification_report(y_test, pred_test)
print("분류예측 레포트 : \n", cfreport_test)

분류예측 레포트 : 
               precision    recall  f1-score   support

           0       0.98      0.95      0.97       111
           1       0.92      0.97      0.94        60

    accuracy                           0.96       171
   macro avg       0.95      0.96      0.96       171
weighted avg       0.96      0.96      0.96       171



그리드서치

In [16]:
from sklearn.model_selection import GridSearchCV

param_grid = {'hidden_layer_sizes' : [10, 30, 50, 100], 'solver' : ['sgd', 'adm'], 'activation' : ['tanh', 'relu']}
grid_search = GridSearchCV(MLPClassifier(), param_grid, cv = 5)
grid_search.fit(X_scaled_train, y_train)

GridSearchCV(cv=5, estimator=MLPClassifier(),
             param_grid={'activation': ['tanh', 'relu'],
                         'hidden_layer_sizes': [10, 30, 50, 100],
                         'solver': ['sgd', 'adm']})

In [18]:
print("best parameter : {}".format(grid_search.best_params_))
print("best score : {:.4f}".format(grid_search.best_score_))
print("test set score : {:.4f}".format(grid_search.score(X_scaled_test, y_test)))

best parameter : {'activation': 'tanh', 'hidden_layer_sizes': 100, 'solver': 'sgd'}
best score : 0.9687
test set score : 0.9591


랜덤서치

In [20]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint
param_distribs = {'hidden_layer_sizes' : randint(low = 10, high = 100), 'solver' : ['sgd', 'adam'], 'activation' : ['tanh', 'relu']}
random_search = RandomizedSearchCV(MLPClassifier(), param_distribs, n_iter = 10, cv = 5)
random_search.fit(X_scaled_train, y_train)

RandomizedSearchCV(cv=5, estimator=MLPClassifier(),
                   param_distributions={'activation': ['tanh', 'relu'],
                                        'hidden_layer_sizes': <scipy.stats._distn_infrastructure.rv_frozen object at 0x000002671089D730>,
                                        'solver': ['sgd', 'adam']})

In [21]:
print("best parameters : {}".format(random_search.best_params_))
print("best score : {:.4f}".format(random_search.best_score_))
print("test set score : {:.4f}".format(random_search.score(X_scaled_test, y_test)))

best parameters : {'activation': 'relu', 'hidden_layer_sizes': 62, 'solver': 'adam'}
best score : 0.9707
test set score : 0.9591


**회귀**

In [22]:
data2 = pd.read_csv('house_price.csv', encoding = 'utf-8')
data2.head()

Unnamed: 0,housing_age,income,bedrooms,households,rooms,house_value
0,23,6.777,0.141112,2.442244,8.10396,500000
1,49,6.0199,0.160984,2.726688,5.752412,500000
2,35,5.1155,0.249061,1.902676,3.888078,500000
3,32,4.7109,0.231383,1.913669,4.508393,500000
4,21,4.5625,0.255583,3.092664,4.667954,500000


In [27]:
x = data2[data2.columns[0:5]]
y = data2[['house_value']]

In [28]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state = 42)

In [29]:
scaler.fit(x_train)
x_scaled_train = scaler.transform(x_train)
x_scaled_test = scaler.transform(x_test)

In [31]:
from sklearn.neural_network import MLPRegressor
model = MLPRegressor()
model.fit(x_scaled_train, y_train)
pred_train = model.predict(x_scaled_train)
model.score(x_scaled_train, y_train)

-2.6914422667536804

In [32]:
pred_test = model.predict(x_scaled_test)
model.score(x_scaled_test, y_test)

-2.650410849837134

In [33]:
import numpy as np
from sklearn.metrics import mean_squared_error
MSE_train = mean_squared_error(y_train, pred_train)
MSE_test = mean_squared_error(y_test, pred_test)
print("훈련 데이터 RMSE : ", np.sqrt(MSE_train))
print("평가 데이터 RMSE : ", np.sqrt(MSE_test))

훈련 데이터 RMSE :  183378.7821648811
평가 데이터 RMSE :  182655.7495155364


In [34]:
model = MLPRegressor(hidden_layer_sizes = (64, 64, 64), activation = "relu", random_state = 1, max_iter = 2000)
model.fit(x_scaled_train, y_train)
pred_train = model.predict(x_scaled_train)
model.score(x_scaled_train, y_train)

0.5930441651564293

In [38]:
pred_test = model.predict(x_scaled_test)
print(model.score(x_scaled_test, y_test))

0.6059090052608731


In [41]:
from sklearn.metrics import mean_squared_error
MSE_train = mean_squared_error(y_train, pred_train)
MSE_test = mean_squared_error(y_test, pred_test)
print("훈련 데이터 RMSE : ", np.sqrt(MSE_train))
print("평가 데이터 RMSE : ", np.sqrt(MSE_test))

훈련 데이터 RMSE :  60887.015082598424
평가 데이터 RMSE :  60015.12699450155
