# **혼자 공부하는 머신러닝 + 딥러닝 CH5**

## **랜덤 포레스트**

### **Import**

In [None]:
import numpy as np
import pandas as pd

from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.model_selection import train_test_split, cross_validate
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, HistGradientBoostingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

### **데이터 전처리**

In [None]:
wine = pd.read_csv('https://bit.ly/wine-date')
data = wine[['alcohol', 'sugar','pH']].to_numpy()
target = wine['class'].to_numpy()
train_input, test_input, train_target, test_target = train_test_split(data,target,random_state = 42)

### **데이터 교차 검증**

In [None]:
rf = RandomForestClassifier(n_jobs = -1, random_state=42)
scores = cross_validate(rf,train_input, train_target, return_train_score=True, n_jobs=-1)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.997844759088341 0.8914208392565683


In [None]:
rf.fit(train_input, train_target)
print(rf.feature_importances_)

[0.23155241 0.49706658 0.27138101]


In [None]:
rf = RandomForestClassifier(random_state=42, n_jobs = -1, oob_score=True)
rf.fit(train_input, train_target)
print(rf.oob_score_)

0.8981937602627258


In [None]:
et = ExtraTreesClassifier(n_jobs=-1, random_state=42)
scores = cross_validate(et, train_input, train_target, return_train_score=True, n_jobs = -1)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.997844759088341 0.8903937240035804


### **Gradient Boosting**

In [None]:
gb = GradientBoostingClassifier(random_state=42)
scores = cross_validate(gb, train_input, train_target, n_jobs=-1, return_train_score=True)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.8894704231708938 0.8715107671247301


In [None]:
gb = GradientBoostingClassifier(n_estimators=500, learning_rate=0.2, random_state=42)
scores = cross_validate(gb, train_input, train_target, n_jobs=-1, return_train_score=True)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9512006117505237 0.879719686200179


In [None]:
gb.fit(train_input, train_target)
print(gb.feature_importances_)

[0.14799897 0.68877883 0.16322219]


### **Histogram-based Gradient Boosting**

In [None]:
hgb = HistGradientBoostingClassifier(random_state=42)
scores = cross_validate(hgb, train_input, train_target, return_train_score=True)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9380129799494501 0.8805410414363187


In [None]:
hgb.fit(train_input, train_target)
print(rf.feature_importances_) #왜 rf 를 사용하는거지????

[0.23155241 0.49706658 0.27138101]


In [None]:
hgb.score(test_input, test_target)

0.8584615384615385

### **XGBoost**

In [None]:
xgb = XGBClassifier(tree_method='hist', random_state=42)
scores = cross_validate(xgb, train_input, train_target, n_jobs=-1, return_train_score=True)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.8855706320776939 0.8745895856368134


### **LightGBM**

In [None]:
lgb = LGBMClassifier(random_state=42)
scores = cross_validate(lgb, train_input, train_target, n_jobs=-1, return_train_score=True)
print(np.mean(scores['train_score']), np.mean(scores['test_score']))

0.9388340849212 0.8807470120570736
