# MLP 실습 : Ensemble Voting (분류)

## data/library 불러오기

In [1]:
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
data=pd.read_csv("breast-cancer-wisconsin.csv")
x=data[data.columns[1:10]]
y=data[['Class']]

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier


In [8]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test= train_test_split(x,y,stratify=y,random_state=42)
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
scaler.fit(x_train)
x_scaled_train=scaler.transform(x_train)
x_scaled_test=scaler.transform(x_test)

In [9]:
logit_model=LogisticRegression(random_state=42)
rnf_model=RandomForestClassifier(random_state=42)
svm_model=SVC(random_state=42)

## model 학습하기 (강학습기 : hard- 범주)

In [10]:
voting_hard=VotingClassifier(estimators=[('lr',logit_model),('rf',rnf_model),('svc',svm_model)])

In [11]:
voting_hard.fit(x_scaled_train,y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(random_state=42)),
                             ('rf', RandomForestClassifier(random_state=42)),
                             ('svc', SVC(random_state=42))])

### 결과 확인하기

In [13]:
from sklearn.metrics import accuracy_score
# 반복문 실행하며 각 model별로 정확도 계산결과 보여주기
for clf in (logit_model,rnf_model,svm_model,voting_hard):
    clf.fit(x_scaled_train,y_train)
    y_pred=clf.predict(x_scaled_test)
    print(clf.__class__.__name__,accuracy_score(y_test,y_pred))



LogisticRegression 0.9590643274853801
RandomForestClassifier 0.9649122807017544
SVC 0.9649122807017544
VotingClassifier 0.9649122807017544


### 각 model별 결과 확인하기 (혼동행렬)

#### Logistic회귀

In [22]:
from sklearn.metrics import confusion_matrix
log_pred_train=logit_model.predict(x_scaled_train)
log_confusion_train=confusion_matrix(y_train,log_pred_train)
print("train set")
print(log_confusion_train)
print("\n")
print("test set")
log_pred_test=logit_model.predict(x_scaled_test)
log_confusion_test=confusion_matrix(y_test,log_pred_test)
print(log_confusion_test)

train set
[[328   5]
 [  9 170]]


test set
[[106   5]
 [  2  58]]


#### SVM

In [19]:
from sklearn.metrics import confusion_matrix
svm_pred_train=svm_model.predict(x_scaled_train)
svm_confusion_train=confusion_matrix(y_train,svm_pred_train)
print("train set")
print(svm_confusion_train)
print("\n")
print("test set")
svm_pred_test=svm_model.predict(x_scaled_test)
svm_confusion_test=confusion_matrix(y_test,svm_pred_test)
print(svm_confusion_test)

[[329   4]
 [  4 175]]


[[106   5]
 [  1  59]]


#### RandomForest

In [23]:
from sklearn.metrics import confusion_matrix
rnf_pred_train=rnf_model.predict(x_scaled_train)
rnf_confusion_train=confusion_matrix(y_train,rnf_pred_train)
print("train set")
print(rnf_confusion_train)
print("\n")
print("test set")
rnf_pred_test=rnf_model.predict(x_scaled_test)
rnf_confusion_test=confusion_matrix(y_test,rnf_pred_test)
print(rnf_confusion_test)

train set
[[333   0]
 [  0 179]]


test set
[[106   5]
 [  1  59]]


#### Voting Ensemble

In [27]:
from sklearn.metrics import confusion_matrix
voting_pred_train=voting_hard.predict(x_scaled_train)
voting_confusion_train=confusion_matrix(y_train,voting_pred_train)
print("train set")
print(voting_confusion_train)
print("\n")
print("test set")
voting_pred_test=voting_hard.predict(x_scaled_test)
voting_confusion_test=confusion_matrix(y_test,voting_pred_test)
print(voting_confusion_test)

train set
[[329   4]
 [  4 175]]


test set
[[106   5]
 [  1  59]]


## model 학습하기 (약학습기 : soft-확률)

In [45]:
logit_model=LogisticRegression(random_state=42)
rnf_model=RandomForestClassifier(random_state=42)
svm_model=SVC(random_state=42,probability=True)

In [46]:
voting_soft=VotingClassifier(estimators=[('lr',logit_model),('rf',rnf_model),('svc',svm_model)],voting="soft")
voting_soft.fit(x_scaled_train,y_train)

VotingClassifier(estimators=[('lr', LogisticRegression(random_state=42)),
                             ('rf', RandomForestClassifier(random_state=42)),
                             ('svc', SVC(probability=True, random_state=42))],
                 voting='soft')

### 결과 확인하기

In [47]:
from sklearn.metrics import accuracy_score
# 반복문 실행하며 각 model별로 정확도 계산결과 보여주기
for clf in (logit_model,rnf_model,svm_model,voting_soft):
    clf.fit(x_scaled_train,y_train)
    y_pred=clf.predict(x_scaled_test)
    print(clf.__class__.__name__,accuracy_score(y_test,y_pred))

LogisticRegression 0.9590643274853801
RandomForestClassifier 0.9649122807017544
SVC 0.9649122807017544
VotingClassifier 0.9649122807017544


In [48]:
from sklearn.metrics import confusion_matrix
voting_pred_train=voting_soft.predict(x_scaled_train)
voting_confusion_train=confusion_matrix(y_train,voting_pred_train)
print("train set")
print(voting_confusion_train)
print("\n")
print("test set")
voting_pred_test=voting_soft.predict(x_scaled_test)
voting_confusion_test=confusion_matrix(y_test,voting_pred_test)
print(voting_confusion_test)

train set
[[330   3]
 [  3 176]]


test set
[[106   5]
 [  1  59]]


# MLP 실습 : Ensemble Voting (회귀)

## data/library 불러오기

In [49]:
data=pd.read_csv("house_price.csv")
x=data[data.columns[1:5]]
y=data[['house_value']]
x_train,x_test,y_train,y_test= train_test_split(x,y,random_state=42)

In [50]:
scaler=MinMaxScaler()
scaler.fit(x_train)
x_scaled_train=scaler.transform(x_train)
x_scaled_test=scaler.transform(x_test)

## model 학습하기

In [51]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import VotingRegressor
linear_model=LinearRegression()
rnf_model=RandomForestRegressor(random_state=42)
voting_regressor=VotingRegressor(estimators=[('lr',linear_model),('rf',rnf_model)])
voting_regressor.fit(x_scaled_train,y_train)

VotingRegressor(estimators=[('lr', LinearRegression()),
                            ('rf', RandomForestRegressor(random_state=42))])

In [55]:
linear_model.fit(x_scaled_train,y_train)
linear_model.score(x_scaled_train,y_train)

0.5455724996358273

In [56]:
rnf_model.fit(x_scaled_train,y_train)
rnf_model.score(x_scaled_train,y_train)

0.9376379766388849

In [58]:
pred_train=voting_regressor.predict(x_scaled_train)
voting_regressor.score(x_scaled_train,y_train)

0.7962532705428835

In [60]:
pred_test=voting_regressor.predict(x_scaled_test)
voting_regressor.score(x_scaled_test,y_test)

0.5936371957936409

## RMSE 확인하기

In [61]:
import numpy as np
from sklearn.metrics import mean_squared_error
MSE_train=mean_squared_error(y_train,pred_train)
MSE_test=mean_squared_error(y_test,pred_test)
print(np.sqrt(MSE_train))
print(np.sqrt(MSE_test))

43082.050654857834
60942.38524353489
