# 회귀 트리
- 회귀 함수를 기반으로 하지 않고 결정 트리과 같이 트리를 기반으로 하는 회귀 방식
- 회귀를 위한 트리를 생성하고 이를 기반으로 회귀 예측
- 리프 노드에서 예측 결정 값을 만드는 과정에서 분류 트리가 특정 클래스 레이브를 결정하는 것과 달리 회귀 트리는 리프노드에 속한 데이터 값의 평균값을 구해 회귀 예측값을 계산
- 결정트리, 랜덤 포레스트 , GBM, XGBOOST, LIGHTGBM등 트리 기반의 알고리즘은 분류뿐만 아니라 회귀도 가능한데 트리 생성이 CART(Classification And Regression Trees)에 기반하고 있기 때문이며 CART는 분류 뿐만 아니라 회귀도 가능하게 해주는 트리 생성 알고리즘이다.

#### EX

##### module import

In [15]:
from sklearn.datasets import load_boston
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import pandas as pd
import numpy as np

###### load_dataset

In [2]:
boston = load_boston()
boston_df = pd.DataFrame(boston.data, columns = boston.feature_names)
boston_df['Price'] = boston.target
boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,Price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [3]:
# seperate
y = boston_df.Price
X = boston_df.iloc[:,:-1]

In [10]:
# model
rf = RandomForestRegressor(random_state=1, n_estimators=1000)

In [11]:
# validation
neg_mean_score = cross_val_score(rf,X,y,scoring='neg_mean_squared_error',cv=5)
rmse_score = np.sqrt(-1*neg_mean_score)
avg_rmse = np.mean(rmse_score)
print('5 folds independent Negative MSE scores:', np.round(neg_mean_score,2))
print('5 folds in denpendent RMSE scores:', np.round(rmse_score,2))
print('5 folds Mean RMSE: {:.3f}'.format(avg_rmse))

5 folds independent Negative MSE scores: [ -8.09 -13.41 -21.32 -46.42 -18.84]
5 folds in denpendent RMSE scores: [2.84 3.66 4.62 6.81 4.34]
5 folds Mean RMSE: 4.456


- dt max_depth = 4
- rf n_estimators = 1000
- gb n_estimators = 1000
- xgb n_estimators = 1000
- lgb n_estimators = 1000

In [48]:
dt = DecisionTreeRegressor(max_depth=4)
rf = RandomForestRegressor(n_estimators=1000)
gb = GradientBoostingRegressor(n_estimators=1000)
xgb = XGBRegressor(n_estimators=1000,validate_parameters=True)
lgb = LGBMRegressor(n_estimators=1000)


In [49]:
# Cross_val_score
def Cross_val_score_eval(model_name,X,y,n_cv):
    neg_mse_score = cross_val_score(model_name,X,y,scoring="neg_mean_squared_error",cv=n_cv)
    rmse_score = np.sqrt(-1*neg_mse_score)
    avg_rmse=np.mean(rmse_score)
    print('[model]:',model_name,'\n')
    print('5 folds independent Negative MSE scores:', np.round(neg_mse_score,2))
    print('5 folds in denpendent RMSE scores:', np.round(rmse_score,2))
    print('5 folds Mean RMSE: {:.3f}'.format(avg_rmse))
    print('---' * 30)

In [50]:
Cross_val_score_eval(dt,X,y,5)
Cross_val_score_eval(rf,X,y,5)
Cross_val_score_eval(gb,X,y,5)
Cross_val_score_eval(xgb,X,y,5)
Cross_val_score_eval(lgb,X,y,5)

[model]: DecisionTreeRegressor(max_depth=4) 

5 folds independent Negative MSE scores: [-11.94 -36.98 -33.48 -46.05 -58.12]
5 folds in denpendent RMSE scores: [3.46 6.08 5.79 6.79 7.62]
5 folds Mean RMSE: 5.947
------------------------------------------------------------------------------------------
[model]: RandomForestRegressor(n_estimators=1000) 

5 folds independent Negative MSE scores: [ -8.12 -13.25 -21.36 -46.24 -19.16]
5 folds in denpendent RMSE scores: [2.85 3.64 4.62 6.8  4.38]
5 folds Mean RMSE: 4.458
------------------------------------------------------------------------------------------
[model]: GradientBoostingRegressor(n_estimators=1000) 

5 folds independent Negative MSE scores: [ -7.47 -14.95 -18.76 -39.24 -15.51]
5 folds in denpendent RMSE scores: [2.73 3.87 4.33 6.26 3.94]
5 folds Mean RMSE: 4.227
------------------------------------------------------------------------------------------
[model]: XGBRegressor(base_score=None, booster=None, colsample_bylevel=None,
 

In [None]:
# basic module
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# sepearte module
from sklearn.model_selection import train_test_split

# machine Learning modules (Regression)
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet

# machine Learning modules (Regression-trees)
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor