In [1]:
# 의사결정나무(Decision Tree)
# - 나무 모양에 기인하여 데이터를 분류하거나 예측하는 모델

In [2]:
# 의사결정나무 분류(Decision Tree Classification)
# - sklearn.tee DecisionTreeClassifier
# - 파라미터
# - criterion : 분할 기준
#               'gini'(default) / 'entropy' / 'log_loss'
# - max_depth : 나무 최대 깊이(default = None)
# - min_samples_leaf : 리프 노드에 있어야 하는 최소 샘플 개수(default = 2)
# - min_samples_split : 자식 노드를 분할하기 위한 최소 샘플 개수 (default = 2)

In [3]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [4]:
iris = load_iris()
X_train,X_test,y_train,y_test = train_test_split(iris.data, iris.target, test_size = 0.2, random_state = 1234, stratify = iris.target)

In [5]:
model_1 = DecisionTreeClassifier() # 객체 생성
model_dt_1 = model_1.fit(X_train,y_train) # fit(데이터 적합)

In [6]:
model_2 = DecisionTreeClassifier(max_depth=4, min_samples_leaf = 5, random_state = 1234)
model_dt_2 = model_2.fit(X_train,y_train)

In [7]:
from sklearn.metrics import f1_score
y_pred_1 = model_dt_1.predict(X_test)
y_pred_2 = model_dt_2.predict(X_test)

In [8]:
macro_f1_1 = f1_score(y_test,y_pred_1,average = 'macro')
macro_f1_2 = f1_score(y_test,y_pred_2,average = 'macro')
print(macro_f1_1,macro_f1_2)

0.9333333333333332 0.9665831244778612


In [9]:
# 의사결정나무 회귀(Decision Tree Regression)
# - sklearn.tree.DecisionTreeRegressor
# - criterion : 분할기준
#               'squared_error' : L2 손실을 최소화(defalt)
#               'absolute_error' : L1 손실을 최소화
# - max_depth : 나무 최대 깊이(default = None)
# - min_samples_leaf : 리프 노드에 있어야 하는 최소 샘플 개수(default = 2)
# - min_samples_split : 자식 노드를 분할하기 위한 최소 샘플 개수(default = 2)

In [10]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes

In [11]:
diabetes = load_diabetes()
X_train, X_test, y_train, y_test = train_test_split(diabetes.data,diabetes.target,test_size = 0.2, random_state = 1234)

In [12]:
model_1 = DecisionTreeRegressor()
model_dtr_1 = model_1.fit(X_train,y_train)

In [13]:
model_2 = model_1 = DecisionTreeRegressor(max_depth = 4, min_samples_leaf = 5, random_state = 1234)
model_dtr_2 = model_2.fit(X_train,y_train)

In [14]:
from sklearn.metrics import mean_absolute_error
y_pred_1 = model_dtr_1.predict(X_test)
y_pred_2 = model_dtr_2.predict(X_test)

In [15]:
mae_1 = mean_absolute_error(y_test,y_pred_1)
mae_2 = mean_absolute_error(y_test,y_pred_2)
print(mae_1, mae_2)

57.31460674157304 52.47961517371391
