## [範例重點]
了解隨機森林的建模方法及其中超參數的意義

In [2]:
from sklearn import datasets, metrics
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [3]:
# 讀取鳶尾花資料集
iris = datasets.load_iris()

# 切分訓練集/測試集
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.25, random_state=4)

# 建立模型 (使用 20 顆樹，每棵樹的最大深度為 4)
clf = RandomForestClassifier(n_estimators=20, max_depth=4)

# 訓練模型
clf.fit(x_train, y_train)

# 預測測試集
y_pred = clf.predict(x_test)

In [4]:
acc = metrics.accuracy_score(y_test, y_pred)
print("Accuracy: ", acc)

Accuracy:  0.9736842105263158


In [5]:
print(iris.feature_names)

['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']


In [6]:
print("Feature importance: ", clf.feature_importances_)

Feature importance:  [0.05312764 0.03495905 0.45227944 0.45963386]


## 作業

1. 試著調整 RandomForestClassifier(...) 中的參數，並觀察是否會改變結果？
2. 改用其他資料集 (boston, wine)，並與回歸模型與決策樹的結果進行比較

In [13]:

digits = datasets.load_digits()

x_train, x_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.25, random_state=4)

clf = RandomForestClassifier(n_estimators=100, max_depth=10)

# 訓練模型
clf.fit(x_train, y_train)

# 預測測試集
y_pred = clf.predict(x_test)

In [14]:
acc = metrics.accuracy_score(y_test, y_pred)
print("Accuracy: ", acc)
print(iris.feature_names)
print("Feature importance: ", clf.feature_importances_)

Accuracy:  0.9822222222222222
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Feature importance:  [0.00000000e+00 2.32764177e-03 2.10346020e-02 1.00465377e-02
 8.78392469e-03 1.84576757e-02 8.90731215e-03 9.06955425e-04
 3.13707878e-05 1.00139255e-02 2.63157292e-02 6.64293066e-03
 1.56251177e-02 3.24429684e-02 4.90043244e-03 8.45124116e-04
 2.95306276e-05 8.16917093e-03 2.03689373e-02 2.75923244e-02
 2.98787043e-02 4.44030550e-02 9.17046573e-03 3.53944510e-04
 3.39930455e-05 1.36758958e-02 4.36736817e-02 2.89559348e-02
 2.55231596e-02 2.19789796e-02 2.69190143e-02 8.12479134e-05
 0.00000000e+00 3.11088135e-02 3.00764151e-02 1.96668059e-02
 4.36981767e-02 1.62790694e-02 2.67757219e-02 0.00000000e+00
 5.54172393e-05 1.04724502e-02 3.72534585e-02 4.25358759e-02
 2.12803615e-02 2.09542315e-02 1.95699605e-02 1.17933885e-04
 4.52191146e-05 1.83791499e-03 1.83592850e-02 2.28579923e-02
 1.35944723e-02 2.32554459e-02 2.53713719e-02 1.97673873e-03
 0.00000000e