## 作業

1. 試著調整 RandomForestClassifier(...) 中的參數，並觀察是否會改變結果？
2. 改用其他資料集 (boston, wine)，並與回歸模型與決策樹的結果進行比較

In [1]:
from sklearn import datasets, metrics, linear_model
from sklearn.tree import DecisionTreeRegressor,DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import train_test_split

  from numpy.core.umath_tests import inner1d


In [2]:
# 未調整前
iris = datasets.load_iris()
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.25, stratify=iris.target)
rfco = RandomForestClassifier()
rfco.fit(x_train, y_train)
y_pred = rfco.predict(x_test)

In [3]:
print("Acuuracy: ", metrics.accuracy_score(y_test, y_pred))
print(iris.feature_names)
print("Feature importance: ", rfco.feature_importances_)

Acuuracy:  0.9210526315789473
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Feature importance:  [0.01992087 0.0220629  0.69606725 0.26194898]


In [4]:
### 調整後
iris = datasets.load_iris()
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.25, stratify=iris.target)
rfc = RandomForestClassifier(max_features=4, n_estimators=1, min_samples_leaf=5)
rfc.fit(x_train, y_train)
y_pred = rfc.predict(x_test)

In [5]:
print("Acuuracy: ", metrics.accuracy_score(y_test, y_pred))
print(iris.feature_names)
print("Feature importance: ", rfc.feature_importances_)

Acuuracy:  0.9473684210526315
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
Feature importance:  [0.         0.00392822 0.64762749 0.34844428]


### Linear

In [6]:
boston = datasets.load_boston()

x_train, x_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=0.15)

lr = linear_model.LinearRegression()

lr.fit(x_train, y_train)

y_pred = lr.predict(x_test)

In [7]:
print('MAE: {:.4f}'.format(metrics.mean_absolute_error(y_test, y_pred)))
print('MSE: {:.4f}'.format(metrics.mean_squared_error(y_test, y_pred)))
print('R-Squared: {:.4f}'.format(metrics.r2_score(y_test, y_pred)))

MAE: 3.7379
MSE: 28.9053
R-Squared: 0.6718


In [8]:
lasso = linear_model.Lasso(alpha=1.0)
lasso.fit(x_train, y_train)
y_pred = lasso.predict(x_test)
print('MAE: {:.4f}'.format(metrics.mean_absolute_error(y_test, y_pred)))
print("Mean Squared Error: {:.4f}".format(metrics.mean_squared_error(y_test, y_pred)))
print("R-Squared: {:.4f}".format(metrics.r2_score(y_test, y_pred)))

MAE: 3.7870
Mean Squared Error: 30.0381
R-Squared: 0.6590


In [9]:
ridge = linear_model.Ridge(alpha=1.0)
ridge.fit(x_train, y_train)
y_pred = ridge.predict(x_test)
print('MAE: {:.4f}'.format(metrics.mean_absolute_error(y_test, y_pred)))
print("Mean Squared Error: {:.4f}".format(metrics.mean_squared_error(y_test, y_pred)))
print("R-Squared: {:.4f}".format(metrics.r2_score(y_test, y_pred)))

MAE: 3.6701
Mean Squared Error: 28.9079
R-Squared: 0.6718


### Decesion Tree

In [10]:
dtr = DecisionTreeRegressor()
dtr.fit(x_train, y_train)
y_pred = dtr.predict(x_test)
print('MAE: {:.4f}'.format(metrics.mean_absolute_error(y_test, y_pred)))
print('MSE: {:.4f}'.format(metrics.mean_squared_error(y_test, y_pred)))
print('R-Squared: {:.4f}'.format(metrics.r2_score(y_test, y_pred)))
print('\n{}'.format(boston.feature_names))
print()
print('feature importance:\n{}'.format(dtr.feature_importances_))

MAE: 2.8658
MSE: 16.3224
R-Squared: 0.8147

['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']

feature importance:
[3.48203604e-02 2.93715053e-03 1.08897479e-02 3.15473270e-04
 2.75388341e-02 6.07931555e-01 1.17304033e-02 8.26395659e-02
 3.46920157e-03 7.58570977e-03 2.59158804e-02 9.24596067e-03
 1.74980157e-01]


### Random Forest

In [11]:
rfr = RandomForestRegressor()
rfr.fit(x_train, y_train)
y_pred = rfr.predict(x_test)
print('MAE: {:.4f}'.format(metrics.mean_absolute_error(y_test, y_pred)))
print('\nMSE: {:.4f}'.format(metrics.mean_squared_error(y_test, y_pred)))
print('\nR-Squared: {:.4f}'.format(metrics.r2_score(y_test, y_pred)))
print('\n{}'.format(boston.feature_names))
print()
print('feature importance:\n{}'.format(rfr.feature_importances_))

MAE: 2.2176

MSE: 12.1700

R-Squared: 0.8618

['CRIM' 'ZN' 'INDUS' 'CHAS' 'NOX' 'RM' 'AGE' 'DIS' 'RAD' 'TAX' 'PTRATIO'
 'B' 'LSTAT']

feature importance:
[0.03207106 0.00153415 0.00566306 0.00056231 0.00970542 0.49209248
 0.01353241 0.07034295 0.00574137 0.02063922 0.01836381 0.01560725
 0.3141445 ]
