# MLP 실습 : Boosting (분류)

## data/library 불러오기

In [2]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
data=pd.read_csv("breast-cancer-wisconsin.csv")
x=data[data.columns[1:10]]
y=data[['Class']]

## Adaboosting 적용

In [3]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test= train_test_split(x,y,stratify=y,random_state=42)
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaler.fit(x_train)
x_scaled_train=scaler.transform(x_train)
x_scaled_test=scaler.transform(x_test)

### model 학습하기

In [5]:
from sklearn.ensemble import AdaBoostClassifier
model= AdaBoostClassifier(n_estimators=100, random_state=0)
model.fit(x_scaled_train,y_train)
pred_train=model.predict(x_scaled_train)
pred_test=model.predict(x_scaled_test)

### 결과 확인하기

In [6]:
model.score(x_scaled_train,y_train)

1.0

In [7]:
model.score(x_scaled_test,y_test)

0.9532163742690059

In [8]:
from sklearn.metrics import classification_report
cfreport_train=classification_report(y_train,pred_train)
print(cfreport_train)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       333
           1       1.00      1.00      1.00       179

    accuracy                           1.00       512
   macro avg       1.00      1.00      1.00       512
weighted avg       1.00      1.00      1.00       512



In [9]:
from sklearn.metrics import classification_report
cfreport_test=classification_report(y_test,pred_test)
print(cfreport_test)

              precision    recall  f1-score   support

           0       0.97      0.95      0.96       111
           1       0.92      0.95      0.93        60

    accuracy                           0.95       171
   macro avg       0.95      0.95      0.95       171
weighted avg       0.95      0.95      0.95       171



In [10]:
from sklearn.metrics import confusion_matrix
confusion_train=confusion_matrix(y_train,pred_train)
print(confusion_train)

[[333   0]
 [  0 179]]


In [11]:
from sklearn.metrics import confusion_matrix
confusion_test=confusion_matrix(y_test,pred_test)
print(confusion_test)

[[106   5]
 [  3  57]]


## GradientBoosing 적용

In [12]:
from sklearn.ensemble import GradientBoostingClassifier
model=GradientBoostingClassifier(n_estimators=100,learning_rate=1.0,max_depth=1,random_state=0)

### model 학습하기

In [13]:
model.fit(x_scaled_train,y_train)

GradientBoostingClassifier(learning_rate=1.0, max_depth=1, random_state=0)

In [14]:
pred_train=model.predict(x_scaled_train)
pred_test=model.predict(x_scaled_test)

### 결과 확인하기

In [16]:
model.score(x_scaled_train,y_train)

1.0

In [17]:
model.score(x_scaled_test,y_test)

0.9649122807017544

In [18]:
from sklearn.metrics import classification_report
cfreport_train=classification_report(y_train,pred_train)
print(cfreport_train)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       333
           1       1.00      1.00      1.00       179

    accuracy                           1.00       512
   macro avg       1.00      1.00      1.00       512
weighted avg       1.00      1.00      1.00       512



In [19]:
from sklearn.metrics import classification_report
cfreport_test=classification_report(y_test,pred_test)
print(cfreport_test)

              precision    recall  f1-score   support

           0       0.99      0.95      0.97       111
           1       0.92      0.98      0.95        60

    accuracy                           0.96       171
   macro avg       0.96      0.97      0.96       171
weighted avg       0.97      0.96      0.97       171



In [20]:
from sklearn.metrics import confusion_matrix
confusion_train=confusion_matrix(y_train,pred_train)
print(confusion_train)

[[333   0]
 [  0 179]]


In [21]:
from sklearn.metrics import confusion_matrix
confusion_test=confusion_matrix(y_test,pred_test)
print(confusion_test)

[[106   5]
 [  1  59]]


# MLP 실습 : Boosting(회귀)

## data/library 불러오기

In [22]:
data=pd.read_csv("house_price.csv")
x=data[data.columns[1:5]]
y=data[['house_value']]
x_train,x_test,y_train,y_test= train_test_split(x,y,random_state=42)

In [23]:
scaler=MinMaxScaler()
scaler.fit(x_train)
x_scaled_train=scaler.transform(x_train)
x_scaled_test=scaler.transform(x_test)

## Adaboosting 적용

### model 학습하기

In [24]:
from sklearn.ensemble import AdaBoostRegressor
model=AdaBoostRegressor(random_state=0,n_estimators=100)
model.fit(x_scaled_train,y_train)
pred_train=model.predict(x_scaled_train)

In [28]:
pred_train=model.predict(x_scaled_train)
model.score(x_scaled_train,y_train)

0.4353130085971758

In [29]:
pred_test=model.predict(x_scaled_test)
model.score(x_scaled_test,y_test)

0.43568387094087124

### 결과 확인하기

In [25]:
model.score(x_scaled_train,y_train)

0.4353130085971758

In [26]:
model.score(x_scaled_test,y_test)

0.43568387094087124

### RMSE 확인하기

In [30]:
import numpy as np
from sklearn.metrics import mean_squared_error
MSE_train=mean_squared_error(y_train,pred_train)
MSE_test=mean_squared_error(y_test,pred_test)
print(np.sqrt(MSE_train))
print(np.sqrt(MSE_test))

71722.42012035428
71816.41231019037


## GrdientBoosting 적용

### model 학습하기

In [31]:
from sklearn.ensemble import GradientBoostingRegressor
model=GradientBoostingRegressor(random_state=0,n_estimators=100)
model.fit(x_scaled_train,y_train)
pred_train=model.predict(x_scaled_train)

### 결과 확인하기

In [32]:
pred_train=model.predict(x_scaled_train)
model.score(x_scaled_train,y_train)

0.6178724780500952

In [33]:
pred_test=model.predict(x_scaled_test)
model.score(x_scaled_test,y_test)

0.5974112241813845

### RMSE 확인하기

In [34]:
import numpy as np
from sklearn.metrics import mean_squared_error
MSE_train=mean_squared_error(y_train,pred_train)
MSE_test=mean_squared_error(y_test,pred_test)
print(np.sqrt(MSE_train))
print(np.sqrt(MSE_test))

59000.433545962376
60658.72886338227
